From c2122b5c9f35efbd2079875e3d8563e5583078d3 Mon Sep 17 00:00:00 2001 From: Wilhelm Behncke Date: Tue, 8 Aug 2023 14:58:48 +0200 Subject: [PATCH 01/19] TASK: Implement Lexer --- scripts/test | 1 + .../Lexer/CharacterStream/CharacterStream.php | 84 ++ src/Language/Lexer/CharacterStream/Cursor.php | 61 ++ src/Language/Lexer/Lexer.php | 197 ++++ src/Language/Lexer/LexerException.php | 68 ++ .../Lexer/Matcher/Characters/Characters.php | 53 + src/Language/Lexer/Matcher/Exact/Exact.php | 48 + src/Language/Lexer/Matcher/Fixed/Fixed.php | 50 + src/Language/Lexer/Matcher/Matcher.php | 180 ++++ src/Language/Lexer/Matcher/Not/Not.php | 51 + src/Language/Lexer/Matcher/Result.php | 30 + .../Lexer/Matcher/Sequence/Sequence.php | 72 ++ src/Language/Lexer/Token/Token.php | 35 + src/Language/Lexer/Token/TokenType.php | 91 ++ src/Language/Lexer/Token/TokenTypes.php | 51 + src/Language/Util/DebugHelper.php | 120 +++ src/Parser/Source/Position.php | 5 + test/Unit/Language/Lexer/LexerTest.php | 942 ++++++++++++++++++ 18 files changed, 2139 insertions(+) create mode 100644 src/Language/Lexer/CharacterStream/CharacterStream.php create mode 100644 src/Language/Lexer/CharacterStream/Cursor.php create mode 100644 src/Language/Lexer/Lexer.php create mode 100644 src/Language/Lexer/LexerException.php create mode 100644 src/Language/Lexer/Matcher/Characters/Characters.php create mode 100644 src/Language/Lexer/Matcher/Exact/Exact.php create mode 100644 src/Language/Lexer/Matcher/Fixed/Fixed.php create mode 100644 src/Language/Lexer/Matcher/Matcher.php create mode 100644 src/Language/Lexer/Matcher/Not/Not.php create mode 100644 src/Language/Lexer/Matcher/Result.php create mode 100644 src/Language/Lexer/Matcher/Sequence/Sequence.php create mode 100644 src/Language/Lexer/Token/Token.php create mode 100644 src/Language/Lexer/Token/TokenType.php create mode 100644 src/Language/Lexer/Token/TokenTypes.php create mode 100644 src/Language/Util/DebugHelper.php create mode 100644 test/Unit/Language/Lexer/LexerTest.php diff --git a/scripts/test b/scripts/test index 367280e..d5006e4 100755 --- a/scripts/test +++ b/scripts/test @@ -17,5 +17,6 @@ --display-deprecations \ --display-errors \ --display-notices \ + --display-warnings \ --coverage-html build/coverage-report \ --coverage-filter src $@ diff --git a/src/Language/Lexer/CharacterStream/CharacterStream.php b/src/Language/Lexer/CharacterStream/CharacterStream.php new file mode 100644 index 0000000..df8a7c4 --- /dev/null +++ b/src/Language/Lexer/CharacterStream/CharacterStream.php @@ -0,0 +1,84 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\CharacterStream; + +use PackageFactory\ComponentEngine\Parser\Source\Position; + +final class CharacterStream +{ + private int $byte; + private Cursor $cursor; + private ?string $characterUnderCursor = null; + + public function __construct(private readonly string $source) + { + $this->byte = 0; + $this->cursor = new Cursor(); + + $this->next(); + } + + public function next(): void + { + $this->cursor->advance($this->characterUnderCursor); + + $nextCharacter = $this->source[$this->byte++] ?? null; + if ($nextCharacter === null) { + $this->characterUnderCursor = null; + return; + } + + $ord = ord($nextCharacter); + if ($ord >= 0x80) { + $nextCharacter .= $this->source[$this->byte++]; + } + if ($ord >= 0xe0) { + $nextCharacter .= $this->source[$this->byte++]; + } + if ($ord >= 0xf0) { + $nextCharacter .= $this->source[$this->byte++]; + } + + $this->characterUnderCursor = $nextCharacter; + } + + public function current(): ?string + { + return $this->characterUnderCursor; + } + + public function isEnd(): bool + { + return $this->characterUnderCursor === null; + } + + public function getCurrentPosition(): Position + { + return $this->cursor->getCurrentPosition(); + } + + public function getPreviousPosition(): Position + { + return $this->cursor->getPreviousPosition(); + } +} diff --git a/src/Language/Lexer/CharacterStream/Cursor.php b/src/Language/Lexer/CharacterStream/Cursor.php new file mode 100644 index 0000000..d2f5c48 --- /dev/null +++ b/src/Language/Lexer/CharacterStream/Cursor.php @@ -0,0 +1,61 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\CharacterStream; + +use PackageFactory\ComponentEngine\Parser\Source\Position; + +final class Cursor +{ + private int $currentLineNumber = 0; + private int $currentColumnNumber = 0; + private int $previousLineNumber = -1; + private int $previousColumnNumber = -1; + + public function advance(?string $character): void + { + if ($character !== null) { + $this->previousLineNumber = $this->currentLineNumber; + $this->previousColumnNumber = $this->currentColumnNumber; + + if ($character === "\n") { + $this->currentLineNumber++; + $this->currentColumnNumber = 0; + } else { + $this->currentColumnNumber++; + } + } + } + + public function getCurrentPosition(): Position + { + return new Position($this->currentLineNumber, $this->currentColumnNumber); + } + + public function getPreviousPosition(): Position + { + assert($this->previousLineNumber >= 0); + assert($this->previousColumnNumber >= 0); + + return new Position($this->previousLineNumber, $this->previousColumnNumber); + } +} diff --git a/src/Language/Lexer/Lexer.php b/src/Language/Lexer/Lexer.php new file mode 100644 index 0000000..8993183 --- /dev/null +++ b/src/Language/Lexer/Lexer.php @@ -0,0 +1,197 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer; + +use PackageFactory\ComponentEngine\Language\Lexer\CharacterStream\CharacterStream; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result; +use PackageFactory\ComponentEngine\Language\Lexer\Token\Token; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; +use PackageFactory\ComponentEngine\Parser\Source\Position; +use PackageFactory\ComponentEngine\Parser\Source\Range; + +final class Lexer +{ + private readonly CharacterStream $characterStream; + private ?Position $startPosition = null; + private int $offset = 0; + private string $buffer = ''; + private ?TokenType $tokenTypeUnderCursor = null; + private ?Token $tokenUnderCursor = null; + private ?LexerException $latestError = null; + + public function __construct(string $source) + { + $this->characterStream = new CharacterStream($source); + } + + public function read(TokenType $tokenType): void + { + assert($this->latestError === null); + $this->startPosition = $this->characterStream->getCurrentPosition(); + + if ($this->characterStream->isEnd()) { + throw $this->latestError = LexerException::becauseOfUnexpectedEndOfSource( + expectedTokenTypes: TokenTypes::from($tokenType), + affectedRangeInSource: $this->startPosition->toRange() + ); + } + + $this->tokenTypeUnderCursor = null; + $this->tokenUnderCursor = null; + $this->offset = 0; + $this->buffer = ''; + + while (true) { + $character = $this->characterStream->current(); + $result = Matcher::for($tokenType)->match($character, $this->offset); + + if ($result === Result::KEEP) { + $this->offset++; + $this->buffer .= $character; + $this->characterStream->next(); + continue; + } + + if ($result === Result::SATISFIED) { + $this->tokenTypeUnderCursor = $tokenType; + break; + } + + if ($result === Result::CANCEL) { + throw $this->latestError = LexerException::becauseOfUnexpectedCharacterSequence( + expectedTokenTypes: TokenTypes::from($tokenType), + affectedRangeInSource: Range::from( + $this->startPosition, + $this->characterStream->getCurrentPosition() + ), + actualCharacterSequence: $this->buffer . $character + ); + } + } + } + + public function readOneOf(TokenTypes $tokenTypes): void + { + assert($this->latestError === null); + $this->startPosition = $this->characterStream->getCurrentPosition(); + + if ($this->characterStream->isEnd()) { + throw $this->latestError = LexerException::becauseOfUnexpectedEndOfSource( + expectedTokenTypes: $tokenTypes, + affectedRangeInSource: $this->startPosition->toRange() + ); + } + + $this->tokenTypeUnderCursor = null; + $this->tokenUnderCursor = null; + $this->offset = 0; + $this->buffer = ''; + + $tokenTypeCandidates = $tokenTypes->items; + while (count($tokenTypeCandidates)) { + $character = $this->characterStream->current(); + + $nextTokenTypeCandidates = []; + foreach ($tokenTypeCandidates as $tokenType) { + $result = Matcher::for($tokenType)->match($character, $this->offset); + + if ($result === Result::KEEP) { + $nextTokenTypeCandidates[] = $tokenType; + continue; + } + + if ($result === Result::SATISFIED) { + $this->tokenTypeUnderCursor = $tokenType; + return; + } + } + + $this->offset++; + $this->buffer .= $character; + $tokenTypeCandidates = $nextTokenTypeCandidates; + $this->characterStream->next(); + } + + throw $this->latestError = LexerException::becauseOfUnexpectedCharacterSequence( + expectedTokenTypes: $tokenTypes, + affectedRangeInSource: Range::from( + $this->startPosition, + $this->characterStream->getPreviousPosition() + ), + actualCharacterSequence: $this->buffer + ); + } + + public function skipSpace(): void + { + assert($this->latestError === null); + $this->skip(TokenType::SPACE, TokenType::END_OF_LINE); + } + + public function skipSpaceAndComments(): void + { + assert($this->latestError === null); + $this->skip(TokenType::SPACE, TokenType::END_OF_LINE, TokenType::COMMENT); + } + + private function skip(TokenType ...$tokenTypes): void + { + while (true) { + $character = $this->characterStream->current(); + + foreach ($tokenTypes as $tokenType) { + $matcher = Matcher::for($tokenType); + + if ($matcher->match($character, 0) === Result::KEEP) { + $this->read($tokenType); + continue 2; + } + } + + break; + } + } + + public function getTokenUnderCursor(): Token + { + assert($this->latestError === null); + assert($this->startPosition !== null); + assert($this->tokenTypeUnderCursor !== null); + + return $this->tokenUnderCursor ??= new Token( + rangeInSource: Range::from( + $this->startPosition, + $this->characterStream->getPreviousPosition() + ), + type: $this->tokenTypeUnderCursor, + value: $this->buffer + ); + } + + public function isEnd(): bool + { + return $this->characterStream->isEnd(); + } +} diff --git a/src/Language/Lexer/LexerException.php b/src/Language/Lexer/LexerException.php new file mode 100644 index 0000000..85e823f --- /dev/null +++ b/src/Language/Lexer/LexerException.php @@ -0,0 +1,68 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer; + +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; +use PackageFactory\ComponentEngine\Language\Util\DebugHelper; +use PackageFactory\ComponentEngine\Parser\Source\Range; + +final class LexerException extends \Exception +{ + private function __construct( + int $code, + string $message, + public readonly Range $affectedRangeInSource + ) { + parent::__construct($message, $code); + } + + public static function becauseOfUnexpectedEndOfSource( + TokenTypes $expectedTokenTypes, + Range $affectedRangeInSource + ): self { + return new self( + code: 1691489789, + message: sprintf( + 'Source ended unexpectedly. Expected %s instead.', + DebugHelper::describeTokenTypes($expectedTokenTypes) + ), + affectedRangeInSource: $affectedRangeInSource + ); + } + + public static function becauseOfUnexpectedCharacterSequence( + TokenTypes $expectedTokenTypes, + Range $affectedRangeInSource, + string $actualCharacterSequence + ): self { + return new self( + code: 1691575769, + message: sprintf( + 'Unexpected character sequence "%s" was encountered. Expected %s instead.', + $actualCharacterSequence, + DebugHelper::describeTokenTypes($expectedTokenTypes) + ), + affectedRangeInSource: $affectedRangeInSource + ); + } +} diff --git a/src/Language/Lexer/Matcher/Characters/Characters.php b/src/Language/Lexer/Matcher/Characters/Characters.php new file mode 100644 index 0000000..4a6059b --- /dev/null +++ b/src/Language/Lexer/Matcher/Characters/Characters.php @@ -0,0 +1,53 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\Matcher\Characters; + +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; + +final class Characters extends Matcher +{ + public function __construct( + private readonly string $allowedCharacters, + private readonly ?string $disallowedCharacters = null + ) { + } + + public function match(?string $character, int $offset): Result + { + if ($character && $this->disallowedCharacters) { + if (str_contains($this->disallowedCharacters, $character)) { + return Result::CANCEL; + } + } + + return match (true) { + $character !== null && + str_contains($this->allowedCharacters, $character) => + Result::KEEP, + $offset > 0 => + Result::SATISFIED, + default => Result::CANCEL + }; + } +} diff --git a/src/Language/Lexer/Matcher/Exact/Exact.php b/src/Language/Lexer/Matcher/Exact/Exact.php new file mode 100644 index 0000000..bcb3d6f --- /dev/null +++ b/src/Language/Lexer/Matcher/Exact/Exact.php @@ -0,0 +1,48 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\Matcher\Exact; + +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; + +final class Exact extends Matcher +{ + private int $length; + + public function __construct(private readonly string $keyword) + { + $this->length = strlen($this->keyword); + assert($this->length > 0); + } + + public function match(?string $character, int $offset): Result + { + return match (true) { + $offset >= $this->length => + Result::SATISFIED, + $this->keyword[$offset] === $character => + Result::KEEP, + default => Result::CANCEL + }; + } +} diff --git a/src/Language/Lexer/Matcher/Fixed/Fixed.php b/src/Language/Lexer/Matcher/Fixed/Fixed.php new file mode 100644 index 0000000..4106fe6 --- /dev/null +++ b/src/Language/Lexer/Matcher/Fixed/Fixed.php @@ -0,0 +1,50 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\Matcher\Fixed; + +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; + +final class Fixed extends Matcher +{ + public function __construct( + private readonly int $fixedLength, + private readonly Matcher $innerMatcher + ) { + assert($this->fixedLength > 0); + } + + public function match(?string $character, int $offset): Result + { + if ($offset >= $this->fixedLength) { + return Result::SATISFIED; + } + + $result = $this->innerMatcher->match($character, $offset); + + return match ($result) { + Result::SATISFIED => Result::CANCEL, + default => $result + }; + } +} diff --git a/src/Language/Lexer/Matcher/Matcher.php b/src/Language/Lexer/Matcher/Matcher.php new file mode 100644 index 0000000..af6249b --- /dev/null +++ b/src/Language/Lexer/Matcher/Matcher.php @@ -0,0 +1,180 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\Matcher; + +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Characters\Characters; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Exact\Exact; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Fixed\Fixed; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Not\Not; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Sequence\Sequence; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; + +abstract class Matcher +{ + /** + * @var array + */ + private static $instancesByTokenType = []; + + final public static function for(TokenType $tokenType): self + { + return self::$instancesByTokenType[$tokenType->value] ??= match ($tokenType) { + TokenType::COMMENT => + new Sequence(new Exact('#'), new Not(new Exact("\n"))), + + TokenType::KEYWORD_FROM => + new Exact('from'), + TokenType::KEYWORD_IMPORT => + new Exact('import'), + TokenType::KEYWORD_EXPORT => + new Exact('export'), + TokenType::KEYWORD_ENUM => + new Exact('enum'), + TokenType::KEYWORD_STRUCT => + new Exact('struct'), + TokenType::KEYWORD_COMPONENT => + new Exact('component'), + TokenType::KEYWORD_MATCH => + new Exact('match'), + TokenType::KEYWORD_DEFAULT => + new Exact('default'), + TokenType::KEYWORD_RETURN => + new Exact('return'), + TokenType::KEYWORD_TRUE => + new Exact('true'), + TokenType::KEYWORD_FALSE => + new Exact('false'), + TokenType::KEYWORD_NULL => + new Exact('null'), + + TokenType::STRING_LITERAL_DELIMITER => + new Exact('"'), + TokenType::STRING_LITERAL_CONTENT => + new Not(new Characters('"\\' . "\n")), + + TokenType::INTEGER_BINARY => + new Sequence(new Exact('0b'), new Characters('01')), + TokenType::INTEGER_OCTAL => + new Sequence(new Exact('0o'), new Characters('01234567')), + TokenType::INTEGER_DECIMAL => + new Characters('0123456789', 'box'), + TokenType::INTEGER_HEXADECIMAL => + new Sequence(new Exact('0x'), new Characters('0123456789ABCDEF')), + + TokenType::TEMPLATE_LITERAL_DELIMITER => + new Exact('"""'), + TokenType::TEMPLATE_LITERAL_CONTENT => + new Not(new Characters('{}\\' . "\n")), + + TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER => + new Sequence( + new Exact('\\'), + new Fixed(1, new Characters('nrtvef\\$"')) + ), + TokenType::ESCAPE_SEQUENCE_HEXADECIMAL => + new Sequence( + new Exact('\\x'), + new Fixed(2, new Characters('abcdefABCDEF0123456789')) + ), + TokenType::ESCAPE_SEQUENCE_UNICODE => + new Sequence( + new Exact('\\u'), + new Fixed(4, new Characters('abcdefABCDEF0123456789')) + ), + TokenType::ESCAPE_SEQUENCE_UNICODE_CODEPOINT => + new Sequence( + new Exact('\\u{'), + new Characters('abcdefABCDEF0123456789'), + new Exact('}') + ), + + TokenType::BRACKET_CURLY_OPEN => + new Exact('{'), + TokenType::BRACKET_CURLY_CLOSE => + new Exact('}'), + TokenType::BRACKET_ROUND_OPEN => + new Exact('('), + TokenType::BRACKET_ROUND_CLOSE => + new Exact(')'), + TokenType::BRACKET_SQUARE_OPEN => + new Exact('['), + TokenType::BRACKET_SQUARE_CLOSE => + new Exact(']'), + TokenType::BRACKET_ANGLE_OPEN => + new Exact('<'), + TokenType::BRACKET_ANGLE_CLOSE => + new Exact('>'), + + TokenType::SYMBOL_COLON => + new Exact(':'), + TokenType::SYMBOL_PERIOD => + new Exact('.'), + TokenType::SYMBOL_QUESTIONMARK => + new Exact('?'), + TokenType::SYMBOL_EXCLAMATIONMARK => + new Exact('!'), + TokenType::SYMBOL_COMMA => + new Exact(','), + TokenType::SYMBOL_DASH => + new Exact('-'), + TokenType::SYMBOL_EQUALS => + new Exact('='), + TokenType::SYMBOL_SLASH_FORWARD => + new Exact('/'), + TokenType::SYMBOL_PIPE => + new Exact('|'), + TokenType::SYMBOL_BOOLEAN_AND => + new Exact('&&'), + TokenType::SYMBOL_BOOLEAN_OR => + new Exact('||'), + TokenType::SYMBOL_STRICT_EQUALs => + new Exact('==='), + TokenType::SYMBOL_NOT_EQUALs => + new Exact('!=='), + TokenType::SYMBOL_GREATER_THAN_OR_EQUAL => + new Exact('>='), + TokenType::SYMBOL_LESS_THAN_OR_EQUAL => + new Exact('<='), + TokenType::SYMBOL_ARROW_SINGLE => + new Exact('->'), + TokenType::SYMBOL_OPTCHAIN => + new Exact('?.'), + TokenType::SYMBOL_NULLISH_COALESCE => + new Exact('??'), + + TokenType::WORD => + new Characters( + 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' + ), + TokenType::TEXT => + new Not(new Characters('<{}>')), + + TokenType::SPACE => + new Characters(" \t"), + TokenType::END_OF_LINE => + new Exact("\n") + }; + } + + abstract public function match(?string $character, int $offset): Result; +} diff --git a/src/Language/Lexer/Matcher/Not/Not.php b/src/Language/Lexer/Matcher/Not/Not.php new file mode 100644 index 0000000..206e87f --- /dev/null +++ b/src/Language/Lexer/Matcher/Not/Not.php @@ -0,0 +1,51 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\Matcher\Not; + +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; + +final class Not extends Matcher +{ + public function __construct(private readonly Matcher $innerMatcher) + { + } + + public function match(?string $character, int $offset): Result + { + $inner = $this->innerMatcher->match($character, 0); + + return match (true) { + $character === null => + Result::SATISFIED, + $offset === 0 => match ($inner) { + Result::KEEP => Result::CANCEL, + default => Result::KEEP + }, + default => match ($inner) { + Result::KEEP => Result::SATISFIED, + default => Result::KEEP + } + }; + } +} diff --git a/src/Language/Lexer/Matcher/Result.php b/src/Language/Lexer/Matcher/Result.php new file mode 100644 index 0000000..b5762e5 --- /dev/null +++ b/src/Language/Lexer/Matcher/Result.php @@ -0,0 +1,30 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\Matcher; + +enum Result +{ + case KEEP; + case CANCEL; + case SATISFIED; +} diff --git a/src/Language/Lexer/Matcher/Sequence/Sequence.php b/src/Language/Lexer/Matcher/Sequence/Sequence.php new file mode 100644 index 0000000..d112794 --- /dev/null +++ b/src/Language/Lexer/Matcher/Sequence/Sequence.php @@ -0,0 +1,72 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\Matcher\Sequence; + +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; + +final class Sequence extends Matcher +{ + private int $lastStop = 0; + private int $matcherIndex = 0; + private int $numberOfMatchers; + + /** + * @var Matcher[] + */ + private readonly array $matchers; + + public function __construct(Matcher ...$matchers) + { + $this->matchers = $matchers; + $this->numberOfMatchers = count($matchers); + assert($this->numberOfMatchers > 0); + } + + public function match(?string $character, int $offset): Result + { + if ($offset === 0) { + $this->lastStop = 0; + $this->matcherIndex = 0; + } + + $matcher = $this->matchers[$this->matcherIndex] ?? null; + assert($matcher !== null); + + $result = $matcher->match($character, $offset - $this->lastStop); + if ($result === Result::SATISFIED) { + $this->matcherIndex++; + if ($this->matcherIndex === $this->numberOfMatchers) { + return $result; + } + + $this->lastStop = $offset; + $matcher = $this->matchers[$this->matcherIndex] ?? null; + assert($matcher !== null); + + return $matcher->match($character, 0); + } + + return $result; + } +} diff --git a/src/Language/Lexer/Token/Token.php b/src/Language/Lexer/Token/Token.php new file mode 100644 index 0000000..97ea214 --- /dev/null +++ b/src/Language/Lexer/Token/Token.php @@ -0,0 +1,35 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\Token; + +use PackageFactory\ComponentEngine\Parser\Source\Range; + +final class Token +{ + public function __construct( + public readonly Range $rangeInSource, + public readonly TokenType $type, + public readonly string $value + ) { + } +} diff --git a/src/Language/Lexer/Token/TokenType.php b/src/Language/Lexer/Token/TokenType.php new file mode 100644 index 0000000..93bd0c3 --- /dev/null +++ b/src/Language/Lexer/Token/TokenType.php @@ -0,0 +1,91 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\Token; + +enum TokenType: string +{ + case COMMENT = 'COMMENT'; + + case KEYWORD_FROM = 'KEYWORD_FROM'; + case KEYWORD_IMPORT = 'KEYWORD_IMPORT'; + case KEYWORD_EXPORT = 'KEYWORD_EXPORT'; + case KEYWORD_ENUM = 'KEYWORD_ENUM'; + case KEYWORD_STRUCT = 'KEYWORD_STRUCT'; + case KEYWORD_COMPONENT = 'KEYWORD_COMPONENT'; + case KEYWORD_MATCH = 'KEYWORD_MATCH'; + case KEYWORD_DEFAULT = 'KEYWORD_DEFAULT'; + case KEYWORD_RETURN = 'KEYWORD_RETURN'; + case KEYWORD_TRUE = 'KEYWORD_TRUE'; + case KEYWORD_FALSE = 'KEYWORD_FALSE'; + case KEYWORD_NULL = 'KEYWORD_NULL'; + + case STRING_LITERAL_DELIMITER = 'STRING_LITERAL_DELIMITER'; + case STRING_LITERAL_CONTENT = 'STRING_LITERAL_CONTENT'; + + case INTEGER_BINARY = 'INTEGER_BINARY'; + case INTEGER_OCTAL = 'INTEGER_OCTAL'; + case INTEGER_DECIMAL = 'INTEGER_DECIMAL'; + case INTEGER_HEXADECIMAL = 'INTEGER_HEXADECIMAL'; + + case TEMPLATE_LITERAL_DELIMITER = 'TEMPLATE_LITERAL_DELIMITER'; + case TEMPLATE_LITERAL_CONTENT = 'TEMPLATE_LITERAL_CONTENT'; + + case ESCAPE_SEQUENCE_SINGLE_CHARACTER = 'ESCAPE_SEQUENCE_SINGLE_CHARACTER'; + case ESCAPE_SEQUENCE_HEXADECIMAL = 'ESCAPE_SEQUENCE_HEXADECIMAL'; + case ESCAPE_SEQUENCE_UNICODE = 'ESCAPE_SEQUENCE_UNICODE'; + case ESCAPE_SEQUENCE_UNICODE_CODEPOINT = 'ESCAPE_SEQUENCE_UNICODE_CODEPOINT'; + + case BRACKET_CURLY_OPEN = 'BRACKET_CURLY_OPEN'; + case BRACKET_CURLY_CLOSE = 'BRACKET_CURLY_CLOSE'; + case BRACKET_ROUND_OPEN = 'BRACKET_ROUND_OPEN'; + case BRACKET_ROUND_CLOSE = 'BRACKET_ROUND_CLOSE'; + case BRACKET_SQUARE_OPEN = 'BRACKET_SQUARE_OPEN'; + case BRACKET_SQUARE_CLOSE = 'BRACKET_SQUARE_CLOSE'; + case BRACKET_ANGLE_OPEN = 'BRACKET_ANGLE_OPEN'; + case BRACKET_ANGLE_CLOSE = 'BRACKET_ANGLE_CLOSE'; + + case SYMBOL_PERIOD = 'SYMBOL_PERIOD'; + case SYMBOL_COLON = 'SYMBOL_COLON'; + case SYMBOL_QUESTIONMARK = 'SYMBOL_QUESTIONMARK'; + case SYMBOL_EXCLAMATIONMARK = 'SYMBOL_EXCLAMATIONMARK'; + case SYMBOL_COMMA = 'SYMBOL_COMMA'; + case SYMBOL_DASH = 'SYMBOL_DASH'; + case SYMBOL_EQUALS = 'SYMBOL_EQUALS'; + case SYMBOL_SLASH_FORWARD = 'SYMBOL_SLASH_FORWARD'; + case SYMBOL_PIPE = 'SYMBOL_PIPE'; + case SYMBOL_BOOLEAN_AND = 'SYMBOL_BOOLEAN_AND'; + case SYMBOL_BOOLEAN_OR = 'SYMBOL_BOOLEAN_OR'; + case SYMBOL_STRICT_EQUALs = 'SYMBOL_STRICT_EQUALs'; + case SYMBOL_NOT_EQUALs = 'SYMBOL_NOT_EQUALs'; + case SYMBOL_GREATER_THAN_OR_EQUAL = 'SYMBOL_GREATER_THAN_OR_EQUAL'; + case SYMBOL_LESS_THAN_OR_EQUAL = 'SYMBOL_LESS_THAN_OR_EQUAL'; + case SYMBOL_ARROW_SINGLE = 'SYMBOL_ARROW_SINGLE'; + case SYMBOL_OPTCHAIN = 'SYMBOL_OPTCHAIN'; + case SYMBOL_NULLISH_COALESCE = 'SYMBOL_NULLISH_COALESCE'; + + case WORD = 'WORD'; + case TEXT = 'TEXT'; + + case SPACE = 'SPACE'; + case END_OF_LINE = 'END_OF_LINE'; +} diff --git a/src/Language/Lexer/Token/TokenTypes.php b/src/Language/Lexer/Token/TokenTypes.php new file mode 100644 index 0000000..60b2322 --- /dev/null +++ b/src/Language/Lexer/Token/TokenTypes.php @@ -0,0 +1,51 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\Token; + +final class TokenTypes +{ + /** + * @var TokenType[] + */ + public readonly array $items; + + private function __construct(TokenType ...$items) + { + assert(count($items) > 0); + + $this->items = $items; + } + + public static function from(TokenType ...$items): self + { + $items = array_unique($items, SORT_REGULAR); + $items = array_values($items); + + return new self(...$items); + } + + public function contains(TokenType $needle): bool + { + return in_array($needle, $this->items); + } +} diff --git a/src/Language/Util/DebugHelper.php b/src/Language/Util/DebugHelper.php new file mode 100644 index 0000000..154c98e --- /dev/null +++ b/src/Language/Util/DebugHelper.php @@ -0,0 +1,120 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Util; + +use PackageFactory\ComponentEngine\Language\Lexer\Token\Token; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; + +final class DebugHelper +{ + public static function describeTokenType(TokenType $tokenType): string + { + return $tokenType->value . match ($tokenType) { + TokenType::COMMENT => ' (e.g. "# ...")', + + TokenType::KEYWORD_FROM => ' ("from")', + TokenType::KEYWORD_IMPORT => ' ("import")', + TokenType::KEYWORD_EXPORT => ' ("export")', + TokenType::KEYWORD_ENUM => ' ("enum")', + TokenType::KEYWORD_STRUCT => ' ("struct")', + TokenType::KEYWORD_COMPONENT => ' ("component")', + TokenType::KEYWORD_MATCH => ' ("match")', + TokenType::KEYWORD_DEFAULT => ' ("default")', + TokenType::KEYWORD_RETURN => ' ("return")', + TokenType::KEYWORD_TRUE => ' ("true")', + TokenType::KEYWORD_FALSE => ' ("false")', + TokenType::KEYWORD_NULL => ' ("null")', + + TokenType::STRING_LITERAL_DELIMITER => ' (""")', + TokenType::STRING_LITERAL_CONTENT => '', + + TokenType::INTEGER_BINARY => ' (e.g. "0b1001")', + TokenType::INTEGER_OCTAL => ' (e.g. "0o644")', + TokenType::INTEGER_DECIMAL => ' (e.g. "42")', + TokenType::INTEGER_HEXADECIMAL => ' (e.g. "0xABC")', + + TokenType::TEMPLATE_LITERAL_DELIMITER => ' (""""")', + TokenType::TEMPLATE_LITERAL_CONTENT => '', + + TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER => ' (e.g. "\\\\" or "\\n")', + TokenType::ESCAPE_SEQUENCE_HEXADECIMAL => ' (e.g. "\\xA9")', + TokenType::ESCAPE_SEQUENCE_UNICODE => ' (e.g. "\\u00A9")', + TokenType::ESCAPE_SEQUENCE_UNICODE_CODEPOINT => ' (e.g. "\\u{2F804}")', + + TokenType::BRACKET_CURLY_OPEN => ' ("{")', + TokenType::BRACKET_CURLY_CLOSE => ' ("}")', + TokenType::BRACKET_ROUND_OPEN => ' ("(")', + TokenType::BRACKET_ROUND_CLOSE => ' (")")', + TokenType::BRACKET_SQUARE_OPEN => ' ("[")', + TokenType::BRACKET_SQUARE_CLOSE => ' ("]")', + TokenType::BRACKET_ANGLE_OPEN => ' ("<")', + TokenType::BRACKET_ANGLE_CLOSE => ' (">")', + + TokenType::SYMBOL_PERIOD => ' (".")', + TokenType::SYMBOL_COLON => ' (":")', + TokenType::SYMBOL_QUESTIONMARK => ' ("?")', + TokenType::SYMBOL_EXCLAMATIONMARK => ' ("!")', + TokenType::SYMBOL_COMMA => ' (",")', + TokenType::SYMBOL_DASH => ' ("-")', + TokenType::SYMBOL_EQUALS => ' ("=")', + TokenType::SYMBOL_SLASH_FORWARD => ' ("/")', + TokenType::SYMBOL_PIPE => ' ("|")', + TokenType::SYMBOL_BOOLEAN_AND => ' ("&&")', + TokenType::SYMBOL_BOOLEAN_OR => ' ("||")', + TokenType::SYMBOL_STRICT_EQUALs => ' ("===")', + TokenType::SYMBOL_NOT_EQUALs => ' ("!==")', + TokenType::SYMBOL_GREATER_THAN_OR_EQUAL => ' (">=")', + TokenType::SYMBOL_LESS_THAN_OR_EQUAL => ' ("<=")', + TokenType::SYMBOL_ARROW_SINGLE => ' ("->")', + TokenType::SYMBOL_OPTCHAIN => ' ("?.")', + TokenType::SYMBOL_NULLISH_COALESCE => ' ("??")', + + TokenType::WORD => '', + TokenType::TEXT => '', + + TokenType::SPACE => '', + TokenType::END_OF_LINE => '' + }; + } + + public static function describeTokenTypes(TokenTypes $tokenTypes): string + { + if (count($tokenTypes->items) === 1) { + return self::describeTokenType($tokenTypes->items[0]); + } + + $leadingItems = array_slice($tokenTypes->items, 0, -1); + $trailingItem = array_slice($tokenTypes->items, -1)[0]; + + return join(', ', array_map( + static fn (TokenType $tokenType) => self::describeTokenType($tokenType), + $leadingItems + )) . ' or ' . self::describeTokenType($trailingItem); + } + + public static function describeToken(Token $token): string + { + return sprintf('%s ("%s")', $token->type->value, $token->value); + } +} diff --git a/src/Parser/Source/Position.php b/src/Parser/Source/Position.php index fd15a83..e7e1b39 100644 --- a/src/Parser/Source/Position.php +++ b/src/Parser/Source/Position.php @@ -41,4 +41,9 @@ public function toDebugString(): string { return sprintf('line %s, column %s', $this->lineNumber, $this->columnNumber); } + + public function toRange(): Range + { + return Range::from($this, $this); + } } diff --git a/test/Unit/Language/Lexer/LexerTest.php b/test/Unit/Language/Lexer/LexerTest.php new file mode 100644 index 0000000..9cb8a03 --- /dev/null +++ b/test/Unit/Language/Lexer/LexerTest.php @@ -0,0 +1,942 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Test\Unit\Language\Lexer; + +use AssertionError; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\LexerException; +use PackageFactory\ComponentEngine\Language\Lexer\Token\Token; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; +use PackageFactory\ComponentEngine\Parser\Source\Position; +use PackageFactory\ComponentEngine\Parser\Source\Range; +use PHPUnit\Framework\TestCase; + +final class LexerTest extends TestCase +{ + /** + * @param array{int,int} $startAsArray + * @param array{int,int} $endAsArray + * @return Range + */ + protected static function range(array $startAsArray, array $endAsArray): Range + { + return Range::from( + new Position(...$startAsArray), + new Position(...$endAsArray) + ); + } + + protected function assertThrowsLexerException(callable $fn, LexerException $expectedLexerException): void + { + $this->expectExceptionObject($expectedLexerException); + + try { + $fn(); + } catch (LexerException $e) { + $this->assertEquals($expectedLexerException, $e); + throw $e; + } + } + + /** + * @return iterable + */ + public static function singleTokenExamples(): iterable + { + yield ($source = '# This is a comment') => + [$source, TokenType::COMMENT]; + yield ($source = '# 🌵🆚⌚️: Multi-byte characters are not a problem inside a comment.') => + [$source, TokenType::COMMENT]; + + yield ($source = 'from') => + [$source, TokenType::KEYWORD_FROM]; + yield ($source = 'import') => + [$source, TokenType::KEYWORD_IMPORT]; + yield ($source = 'export') => + [$source, TokenType::KEYWORD_EXPORT]; + yield ($source = 'enum') => + [$source, TokenType::KEYWORD_ENUM]; + yield ($source = 'struct') => + [$source, TokenType::KEYWORD_STRUCT]; + yield ($source = 'component') => + [$source, TokenType::KEYWORD_COMPONENT]; + yield ($source = 'match') => + [$source, TokenType::KEYWORD_MATCH]; + yield ($source = 'default') => + [$source, TokenType::KEYWORD_DEFAULT]; + yield ($source = 'return') => + [$source, TokenType::KEYWORD_RETURN]; + yield ($source = 'true') => + [$source, TokenType::KEYWORD_TRUE]; + yield ($source = 'false') => + [$source, TokenType::KEYWORD_FALSE]; + yield ($source = 'null') => + [$source, TokenType::KEYWORD_NULL]; + + yield ($source = '"') => + [$source, TokenType::STRING_LITERAL_DELIMITER]; + yield ($source = 'Some string without any escapes') => + [$source, TokenType::STRING_LITERAL_CONTENT]; + yield ($source = '🌵🆚⌚️: Multi-byte characters are not a problem inside a string.') => + [$source, TokenType::STRING_LITERAL_CONTENT]; + + yield ($source = '0b1001') => + [$source, TokenType::INTEGER_BINARY]; + yield ($source = '0o12345670') => + [$source, TokenType::INTEGER_OCTAL]; + yield ($source = '1234567890') => + [$source, TokenType::INTEGER_DECIMAL]; + yield ($source = '0xABCDEF1234567890') => + [$source, TokenType::INTEGER_HEXADECIMAL]; + + yield ($source = '"""') => + [$source, TokenType::TEMPLATE_LITERAL_DELIMITER]; + yield ($source = 'Some string without escapes') => + [$source, TokenType::TEMPLATE_LITERAL_CONTENT]; + yield ($source = '🌵🆚⌚️: Multi-byte characters are not a problem inside of template literals.') => + [$source, TokenType::TEMPLATE_LITERAL_CONTENT]; + + yield ($source = '\\\\') => + [$source, TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER]; + yield ($source = '\\n') => + [$source, TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER]; + yield ($source = '\\t') => + [$source, TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER]; + yield ($source = '\\xA9') => + [$source, TokenType::ESCAPE_SEQUENCE_HEXADECIMAL]; + yield ($source = '\\u00A9') => + [$source, TokenType::ESCAPE_SEQUENCE_UNICODE]; + yield ($source = '\\u{2F804}') => + [$source, TokenType::ESCAPE_SEQUENCE_UNICODE_CODEPOINT]; + + yield ($source = '{') => + [$source, TokenType::BRACKET_CURLY_OPEN]; + yield ($source = '}') => + [$source, TokenType::BRACKET_CURLY_CLOSE]; + yield ($source = '(') => + [$source, TokenType::BRACKET_ROUND_OPEN]; + yield ($source = ')') => + [$source, TokenType::BRACKET_ROUND_CLOSE]; + yield ($source = '[') => + [$source, TokenType::BRACKET_SQUARE_OPEN]; + yield ($source = ']') => + [$source, TokenType::BRACKET_SQUARE_CLOSE]; + yield ($source = '<') => + [$source, TokenType::BRACKET_ANGLE_OPEN]; + yield ($source = '>') => + [$source, TokenType::BRACKET_ANGLE_CLOSE]; + + yield ($source = '.') => + [$source, TokenType::SYMBOL_PERIOD]; + yield ($source = ':') => + [$source, TokenType::SYMBOL_COLON]; + yield ($source = '?') => + [$source, TokenType::SYMBOL_QUESTIONMARK]; + yield ($source = '!') => + [$source, TokenType::SYMBOL_EXCLAMATIONMARK]; + yield ($source = ',') => + [$source, TokenType::SYMBOL_COMMA]; + yield ($source = '-') => + [$source, TokenType::SYMBOL_DASH]; + yield ($source = '=') => + [$source, TokenType::SYMBOL_EQUALS]; + yield ($source = '/') => + [$source, TokenType::SYMBOL_SLASH_FORWARD]; + yield ($source = '|') => + [$source, TokenType::SYMBOL_PIPE]; + yield ($source = '&&') => + [$source, TokenType::SYMBOL_BOOLEAN_AND]; + yield ($source = '||') => + [$source, TokenType::SYMBOL_BOOLEAN_OR]; + yield ($source = '===') => + [$source, TokenType::SYMBOL_STRICT_EQUALs]; + yield ($source = '!==') => + [$source, TokenType::SYMBOL_NOT_EQUALs]; + yield ($source = '>=') => + [$source, TokenType::SYMBOL_GREATER_THAN_OR_EQUAL]; + yield ($source = '<=') => + [$source, TokenType::SYMBOL_LESS_THAN_OR_EQUAL]; + yield ($source = '->') => + [$source, TokenType::SYMBOL_ARROW_SINGLE]; + yield ($source = '?.') => + [$source, TokenType::SYMBOL_OPTCHAIN]; + yield ($source = '??') => + [$source, TokenType::SYMBOL_NULLISH_COALESCE]; + + yield ($source = 'ValidWord') => + [$source, TokenType::WORD]; + yield ($source = 'V4l1dW0rd') => + [$source, TokenType::WORD]; + yield ($source = '1245ValidWord') => + [$source, TokenType::WORD]; + + yield ($source = 'Just some text. Nothing special.') => + [$source, TokenType::TEXT]; + yield ($source = '🌵🆚⌚️: Multi-byte characters are not a problem inside of text.') => + [$source, TokenType::TEXT]; + + yield ($source = ' ') => + [$source, TokenType::SPACE]; + yield ($source = ' ') => + [$source, TokenType::SPACE]; + yield ($source = "\t") => + [$source, TokenType::SPACE]; + yield ($source = "\t\t\t\t") => + [$source, TokenType::SPACE]; + yield ($source = " \t \t \t \t ") => + [$source, TokenType::SPACE]; + + yield ($source = "\n") => + [$source, TokenType::END_OF_LINE]; + } + + /** + * @dataProvider singleTokenExamples + * @test + * @param string $source + * @param TokenType $expectedTokenType + * @return void + */ + public function readsSingleToken(string $source, TokenType $expectedTokenType): void + { + $lexer = new Lexer($source); + $lexer->read($expectedTokenType); + + $this->assertEquals( + new Token( + rangeInSource: self::range([0, 0], [0, \mb_strlen($source) - 1]), + type: $expectedTokenType, + value: $source + ), + $lexer->getTokenUnderCursor() + ); + } + + /** + * @return iterable + */ + public static function multipleTokensExamples(): iterable + { + yield ($source = "# This is a comment\n# This is also a comment") => [ + $source, + TokenTypes::from(TokenType::COMMENT, TokenType::END_OF_LINE), + new Token(self::range([0, 0], [0, 18]), TokenType::COMMENT, '# This is a comment'), + new Token(self::range([0, 19], [0, 19]), TokenType::END_OF_LINE, "\n"), + new Token(self::range([1, 0], [1, 23]), TokenType::COMMENT, '# This is also a comment') + ]; + + yield ($source = "1765224, -0xAB89CD, true\nnull") => [ + $source, + TokenTypes::from( + TokenType::SYMBOL_DASH, + TokenType::SYMBOL_COMMA, + TokenType::INTEGER_HEXADECIMAL, + TokenType::INTEGER_DECIMAL, + TokenType::SPACE, + TokenType::END_OF_LINE, + TokenType::KEYWORD_TRUE, + TokenType::KEYWORD_NULL + ), + new Token(self::range([0, 0], [0, 6]), TokenType::INTEGER_DECIMAL, '1765224'), + new Token(self::range([0, 7], [0, 7]), TokenType::SYMBOL_COMMA, ','), + new Token(self::range([0, 8], [0, 8]), TokenType::SPACE, ' '), + new Token(self::range([0, 9], [0, 9]), TokenType::SYMBOL_DASH, '-'), + new Token(self::range([0, 10], [0, 17]), TokenType::INTEGER_HEXADECIMAL, '0xAB89CD'), + new Token(self::range([0, 18], [0, 18]), TokenType::SYMBOL_COMMA, ','), + new Token(self::range([0, 19], [0, 19]), TokenType::SPACE, ' '), + new Token(self::range([0, 20], [0, 23]), TokenType::KEYWORD_TRUE, 'true'), + new Token(self::range([0, 24], [0, 24]), TokenType::END_OF_LINE, "\n"), + new Token(self::range([1, 0], [1, 3]), TokenType::KEYWORD_NULL, 'null') + ]; + + yield ($source = '0b100101 892837 0xFFAAEE 0o75374') => [ + $source, + TokenTypes::from( + TokenType::INTEGER_BINARY, + TokenType::INTEGER_OCTAL, + TokenType::INTEGER_HEXADECIMAL, + TokenType::INTEGER_DECIMAL, + TokenType::SPACE + ), + new Token(self::range([0, 0], [0, 7]), TokenType::INTEGER_BINARY, '0b100101'), + new Token(self::range([0, 8], [0, 8]), TokenType::SPACE, ' '), + new Token(self::range([0, 9], [0, 14]), TokenType::INTEGER_DECIMAL, '892837'), + new Token(self::range([0, 15], [0, 15]), TokenType::SPACE, ' '), + new Token(self::range([0, 16], [0, 23]), TokenType::INTEGER_HEXADECIMAL, '0xFFAAEE'), + new Token(self::range([0, 24], [0, 24]), TokenType::SPACE, ' '), + new Token(self::range([0, 25], [0, 31]), TokenType::INTEGER_OCTAL, '0o75374'), + ]; + + yield ($source = '"This is a string literal with \\n escapes \\xB1 \\u5FA9 \\u{1343E}!"') => [ + $source, + TokenTypes::from( + TokenType::STRING_LITERAL_DELIMITER, + TokenType::STRING_LITERAL_CONTENT, + TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, + TokenType::ESCAPE_SEQUENCE_HEXADECIMAL, + TokenType::ESCAPE_SEQUENCE_UNICODE, + TokenType::ESCAPE_SEQUENCE_UNICODE_CODEPOINT + ), + new Token(self::range([0, 0], [0, 0]), TokenType::STRING_LITERAL_DELIMITER, '"'), + new Token(self::range([0, 1], [0, 30]), TokenType::STRING_LITERAL_CONTENT, 'This is a string literal with '), + new Token(self::range([0, 31], [0, 32]), TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\n'), + new Token(self::range([0, 33], [0, 41]), TokenType::STRING_LITERAL_CONTENT, ' escapes '), + new Token(self::range([0, 42], [0, 45]), TokenType::ESCAPE_SEQUENCE_HEXADECIMAL, '\\xB1'), + new Token(self::range([0, 46], [0, 46]), TokenType::STRING_LITERAL_CONTENT, ' '), + new Token(self::range([0, 47], [0, 52]), TokenType::ESCAPE_SEQUENCE_UNICODE, '\\u5FA9'), + new Token(self::range([0, 53], [0, 53]), TokenType::STRING_LITERAL_CONTENT, ' '), + new Token(self::range([0, 54], [0, 62]), TokenType::ESCAPE_SEQUENCE_UNICODE_CODEPOINT, '\\u{1343E}'), + new Token(self::range([0, 63], [0, 63]), TokenType::STRING_LITERAL_CONTENT, '!'), + new Token(self::range([0, 64], [0, 64]), TokenType::STRING_LITERAL_DELIMITER, '"') + ]; + + $source = << [ + $source, + TokenTypes::from( + TokenType::TEMPLATE_LITERAL_DELIMITER, + TokenType::SPACE, + TokenType::TEMPLATE_LITERAL_CONTENT, + TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, + TokenType::ESCAPE_SEQUENCE_HEXADECIMAL, + TokenType::ESCAPE_SEQUENCE_UNICODE, + TokenType::ESCAPE_SEQUENCE_UNICODE_CODEPOINT, + TokenType::END_OF_LINE, + TokenType::BRACKET_CURLY_OPEN, + TokenType::BRACKET_CURLY_CLOSE + ), + new Token(self::range([0, 0], [0, 2]), TokenType::TEMPLATE_LITERAL_DELIMITER, '"""'), + new Token(self::range([0, 3], [0, 3]), TokenType::END_OF_LINE, "\n"), + new Token(self::range([1, 0], [1, 3]), TokenType::SPACE, ' '), + new Token(self::range([1, 4], [1, 43]), TokenType::TEMPLATE_LITERAL_CONTENT, 'This is "template literal" content with '), + new Token(self::range([1, 44], [1, 45]), TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\n'), + new Token(self::range([1, 46], [1, 46]), TokenType::SPACE, ' '), + new Token(self::range([1, 47], [1, 54]), TokenType::TEMPLATE_LITERAL_CONTENT, 'escapes '), + new Token(self::range([1, 55], [1, 58]), TokenType::ESCAPE_SEQUENCE_HEXADECIMAL, '\\xB1'), + new Token(self::range([1, 59], [1, 59]), TokenType::SPACE, ' '), + new Token(self::range([1, 60], [1, 65]), TokenType::ESCAPE_SEQUENCE_UNICODE, '\\u5FA9'), + new Token(self::range([1, 66], [1, 66]), TokenType::SPACE, ' '), + new Token(self::range([1, 67], [1, 75]), TokenType::ESCAPE_SEQUENCE_UNICODE_CODEPOINT, '\\u{1343E}'), + new Token(self::range([1, 76], [1, 76]), TokenType::END_OF_LINE, "\n"), + new Token(self::range([2, 0], [2, 3]), TokenType::SPACE, ' '), + new Token(self::range([2, 4], [2, 29]), TokenType::TEMPLATE_LITERAL_CONTENT, 'and embedded expressions: '), + new Token(self::range([2, 30], [2, 30]), TokenType::BRACKET_CURLY_OPEN, '{'), + new Token(self::range([2, 31], [2, 31]), TokenType::BRACKET_CURLY_CLOSE, '}'), + new Token(self::range([2, 32], [2, 32]), TokenType::SPACE, ' '), + new Token(self::range([2, 33], [2, 34]), TokenType::TEMPLATE_LITERAL_CONTENT, ':)'), + new Token(self::range([2, 35], [2, 35]), TokenType::END_OF_LINE, "\n"), + new Token(self::range([3, 0], [3, 3]), TokenType::SPACE, ' '), + new Token(self::range([3, 4], [3, 6]), TokenType::TEMPLATE_LITERAL_DELIMITER, '"""'), + ]; + + $source = << + +
+
+ + AFX; + yield $source => [ + $source, + TokenTypes::from( + TokenType::BRACKET_ANGLE_OPEN, + TokenType::WORD, + TokenType::SPACE, + TokenType::SYMBOL_EQUALS, + TokenType::STRING_LITERAL_DELIMITER, + TokenType::BRACKET_ANGLE_CLOSE, + TokenType::END_OF_LINE, + TokenType::SYMBOL_DASH, + TokenType::SYMBOL_SLASH_FORWARD, + TokenType::BRACKET_CURLY_OPEN, + TokenType::BRACKET_CURLY_CLOSE, + TokenType::SYMBOL_COLON + ), + new Token(self::range([0, 0], [0, 0]), TokenType::BRACKET_ANGLE_OPEN, '<'), + new Token(self::range([0, 1], [0, 1]), TokenType::WORD, 'a'), + new Token(self::range([0, 2], [0, 2]), TokenType::SPACE, ' '), + new Token(self::range([0, 3], [0, 6]), TokenType::WORD, 'href'), + new Token(self::range([0, 7], [0, 7]), TokenType::SYMBOL_EQUALS, '='), + new Token(self::range([0, 8], [0, 8]), TokenType::STRING_LITERAL_DELIMITER, '"'), + new Token(self::range([0, 9], [0, 9]), TokenType::STRING_LITERAL_DELIMITER, '"'), + new Token(self::range([0, 10], [0, 10]), TokenType::BRACKET_ANGLE_CLOSE, '>'), + new Token(self::range([0, 11], [0, 11]), TokenType::END_OF_LINE, "\n"), + new Token(self::range([1, 0], [1, 3]), TokenType::SPACE, ' '), + new Token(self::range([1, 4], [1, 4]), TokenType::BRACKET_ANGLE_OPEN, '<'), + new Token(self::range([1, 5], [1, 6]), TokenType::WORD, 'my'), + new Token(self::range([1, 7], [1, 7]), TokenType::SYMBOL_DASH, '-'), + new Token(self::range([1, 8], [1, 14]), TokenType::WORD, 'element'), + new Token(self::range([1, 15], [1, 15]), TokenType::SYMBOL_SLASH_FORWARD, '/'), + new Token(self::range([1, 16], [1, 16]), TokenType::BRACKET_ANGLE_CLOSE, '>'), + new Token(self::range([1, 17], [1, 17]), TokenType::END_OF_LINE, "\n"), + new Token(self::range([2, 0], [2, 3]), TokenType::SPACE, ' '), + new Token(self::range([2, 4], [2, 4]), TokenType::BRACKET_ANGLE_OPEN, '<'), + new Token(self::range([2, 5], [2, 7]), TokenType::WORD, 'div'), + new Token(self::range([2, 8], [2, 8]), TokenType::SPACE, ' '), + new Token(self::range([2, 9], [2, 13]), TokenType::WORD, 'class'), + new Token(self::range([2, 14], [2, 14]), TokenType::SYMBOL_EQUALS, '='), + new Token(self::range([2, 15], [2, 15]), TokenType::BRACKET_CURLY_OPEN, '{'), + new Token(self::range([2, 16], [2, 16]), TokenType::BRACKET_CURLY_CLOSE, '}'), + new Token(self::range([2, 17], [2, 17]), TokenType::SPACE, ' '), + new Token(self::range([2, 18], [2, 20]), TokenType::WORD, 'foo'), + new Token(self::range([2, 21], [2, 21]), TokenType::SYMBOL_COLON, ':'), + new Token(self::range([2, 22], [2, 24]), TokenType::WORD, 'bar'), + new Token(self::range([2, 25], [2, 25]), TokenType::BRACKET_ANGLE_CLOSE, '>'), + new Token(self::range([2, 26], [2, 26]), TokenType::END_OF_LINE, "\n"), + new Token(self::range([3, 0], [3, 3]), TokenType::SPACE, ' '), + new Token(self::range([3, 4], [3, 4]), TokenType::BRACKET_ANGLE_OPEN, '<'), + new Token(self::range([3, 5], [3, 5]), TokenType::SYMBOL_SLASH_FORWARD, '/'), + new Token(self::range([3, 6], [3, 8]), TokenType::WORD, 'div'), + new Token(self::range([3, 9], [3, 9]), TokenType::BRACKET_ANGLE_CLOSE, '>'), + new Token(self::range([3, 10], [3, 10]), TokenType::END_OF_LINE, "\n"), + new Token(self::range([4, 0], [4, 0]), TokenType::BRACKET_ANGLE_OPEN, '<'), + new Token(self::range([4, 1], [4, 1]), TokenType::SYMBOL_SLASH_FORWARD, '/'), + new Token(self::range([4, 2], [4, 2]), TokenType::WORD, 'a'), + new Token(self::range([4, 3], [4, 3]), TokenType::BRACKET_ANGLE_CLOSE, '>'), + ]; + + $source = << inside. + AFX; + yield $source => [ + $source, + TokenTypes::from( + TokenType::TEXT, + TokenType::BRACKET_CURLY_OPEN, + TokenType::BRACKET_CURLY_CLOSE, + TokenType::END_OF_LINE, + TokenType::BRACKET_ANGLE_OPEN, + TokenType::BRACKET_ANGLE_CLOSE + ), + new Token(self::range([0, 0], [0, 34]), TokenType::TEXT, 'This is some text with expressions '), + new Token(self::range([0, 35], [0, 35]), TokenType::BRACKET_CURLY_OPEN, '{'), + new Token(self::range([0, 36], [0, 36]), TokenType::BRACKET_CURLY_CLOSE, '}'), + new Token(self::range([0, 37], [0, 37]), TokenType::END_OF_LINE, "\n"), + new Token(self::range([1, 0], [1, 8]), TokenType::TEXT, 'and tags '), + new Token(self::range([1, 9], [1, 9]), TokenType::BRACKET_ANGLE_OPEN, '<'), + new Token(self::range([1, 10], [1, 10]), TokenType::BRACKET_ANGLE_CLOSE, '>'), + new Token(self::range([1, 11], [1, 18]), TokenType::TEXT, ' inside.'), + ]; + } + + /** + * @dataProvider multipleTokensExamples + * @test + * @param string $source + * @param Token ...$expectedTokens + * @return void + */ + public function readsMultipleTokens( + string $source, + TokenTypes $tokenTypes, + Token ...$expectedTokens + ): void { + $lexer = new Lexer($source); + + $actualTokens = []; + foreach ($expectedTokens as $token) { + $lexer->readOneOf($tokenTypes); + $actualTokens[] = $lexer->getTokenUnderCursor(); + } + + $this->assertEquals($expectedTokens, $actualTokens); + } + + /** + * @return iterable + */ + public static function failingSingleTokenExamples(): iterable + { + $example = static function ( + TokenType $type, + string $source, + string $unexpectedCharacterSequence + ) { + yield sprintf('%s: %s', $type->value, $source) => [ + $source, + $type, + self::range([0, 0], [0, \mb_strlen($unexpectedCharacterSequence) - 1]), + $unexpectedCharacterSequence + ]; + }; + + yield from $example(TokenType::COMMENT, 'Anything that does not start with "#"', 'A'); + + yield from $example(TokenType::KEYWORD_FROM, 'false', 'fa'); + yield from $example(TokenType::KEYWORD_IMPORT, 'implausible', 'impl'); + yield from $example(TokenType::KEYWORD_EXPORT, 'ex-machina', 'ex-'); + yield from $example(TokenType::KEYWORD_ENUM, 'enough', 'eno'); + yield from $example(TokenType::KEYWORD_STRUCT, 'strict', 'stri'); + yield from $example(TokenType::KEYWORD_COMPONENT, 'composition', 'compos'); + yield from $example(TokenType::KEYWORD_MATCH, 'matter', 'matt'); + yield from $example(TokenType::KEYWORD_DEFAULT, 'definition', 'defi'); + yield from $example(TokenType::KEYWORD_RETURN, 'retroactive', 'retr'); + yield from $example(TokenType::KEYWORD_TRUE, 'truth', 'trut'); + yield from $example(TokenType::KEYWORD_FALSE, 'falsify', 'falsi'); + yield from $example(TokenType::KEYWORD_NULL, 'nuclear', 'nuc'); + + yield from $example(TokenType::STRING_LITERAL_DELIMITER, '\'', '\''); + yield from $example(TokenType::STRING_LITERAL_CONTENT, '"', '"'); + yield from $example(TokenType::STRING_LITERAL_CONTENT, "\n", "\n"); + yield from $example(TokenType::STRING_LITERAL_CONTENT, '\\', '\\'); + + yield from $example(TokenType::INTEGER_BINARY, '001001', '00'); + yield from $example(TokenType::INTEGER_BINARY, '0b21', '0b2'); + yield from $example(TokenType::INTEGER_OCTAL, '0p12345670', '0p'); + yield from $example(TokenType::INTEGER_OCTAL, '0o84', '0o8'); + yield from $example(TokenType::INTEGER_DECIMAL, ' ', ' '); + yield from $example(TokenType::INTEGER_DECIMAL, 'foo', 'f'); + yield from $example(TokenType::INTEGER_HEXADECIMAL, '0xG', '0xG'); + yield from $example(TokenType::INTEGER_HEXADECIMAL, '0yFFAA00', '0y'); + + yield from $example(TokenType::TEMPLATE_LITERAL_DELIMITER, '`', '`'); + yield from $example(TokenType::TEMPLATE_LITERAL_CONTENT, '{', '{'); + yield from $example(TokenType::TEMPLATE_LITERAL_CONTENT, '}', '}'); + yield from $example(TokenType::TEMPLATE_LITERAL_CONTENT, "\n", "\n"); + yield from $example(TokenType::TEMPLATE_LITERAL_CONTENT, '\\', '\\'); + + yield from $example(TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\o', '\\o'); + yield from $example(TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\ü', '\\ü'); + yield from $example(TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\£', '\\£'); + yield from $example(TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\И', '\\И'); + yield from $example(TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\ह', '\\ह'); + yield from $example(TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\€', '\\€'); + yield from $example(TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\𐍈', '\\𐍈'); + yield from $example(TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\8', '\\8'); + yield from $example(TokenType::ESCAPE_SEQUENCE_HEXADECIMAL, '\\x9G', '\\x9G'); + yield from $example(TokenType::ESCAPE_SEQUENCE_UNICODE, '\\u00AY', '\\u00AY'); + yield from $example(TokenType::ESCAPE_SEQUENCE_UNICODE_CODEPOINT, '\\u{2F8O4}', '\\u{2F8O'); + + yield from $example(TokenType::BRACKET_CURLY_OPEN, 'a', 'a'); + yield from $example(TokenType::BRACKET_CURLY_OPEN, '😱', '😱'); + yield from $example(TokenType::BRACKET_CURLY_CLOSE, 'b', 'b'); + yield from $example(TokenType::BRACKET_CURLY_CLOSE, '🖖', '🖖'); + yield from $example(TokenType::BRACKET_ROUND_OPEN, 'c', 'c'); + yield from $example(TokenType::BRACKET_ROUND_OPEN, '🌈', '🌈'); + yield from $example(TokenType::BRACKET_ROUND_CLOSE, 'd', 'd'); + yield from $example(TokenType::BRACKET_ROUND_CLOSE, '⚓', '⚓'); + yield from $example(TokenType::BRACKET_SQUARE_OPEN, 'e', 'e'); + yield from $example(TokenType::BRACKET_SQUARE_OPEN, '☘', '☘'); + yield from $example(TokenType::BRACKET_SQUARE_CLOSE, 'f', 'f'); + yield from $example(TokenType::BRACKET_SQUARE_CLOSE, '🎷', '🎷'); + yield from $example(TokenType::BRACKET_ANGLE_OPEN, 'g', 'g'); + yield from $example(TokenType::BRACKET_ANGLE_OPEN, '🐒', '🐒'); + yield from $example(TokenType::BRACKET_ANGLE_CLOSE, 'h', 'h'); + yield from $example(TokenType::BRACKET_ANGLE_CLOSE, '💡', '💡'); + + yield from $example(TokenType::SYMBOL_PERIOD, 'i', 'i'); + yield from $example(TokenType::SYMBOL_PERIOD, '?.', '?'); + yield from $example(TokenType::SYMBOL_COLON, '-', '-'); + yield from $example(TokenType::SYMBOL_COLON, '➗', '➗'); + yield from $example(TokenType::SYMBOL_QUESTIONMARK, '❓', '❓'); + yield from $example(TokenType::SYMBOL_EXCLAMATIONMARK, '❗', '❗'); + yield from $example(TokenType::SYMBOL_COMMA, '.', '.'); + yield from $example(TokenType::SYMBOL_DASH, '➖', '➖'); + yield from $example(TokenType::SYMBOL_EQUALS, '<=', '<'); + yield from $example(TokenType::SYMBOL_SLASH_FORWARD, '\\', '\\'); + yield from $example(TokenType::SYMBOL_PIPE, '🌵', '🌵'); + yield from $example(TokenType::SYMBOL_BOOLEAN_AND, '§§', '§'); + yield from $example(TokenType::SYMBOL_BOOLEAN_OR, '//', '/'); + yield from $example(TokenType::SYMBOL_STRICT_EQUALs, '!==', '!'); + yield from $example(TokenType::SYMBOL_NOT_EQUALs, '===', '='); + yield from $example(TokenType::SYMBOL_GREATER_THAN_OR_EQUAL, '=>', '='); + yield from $example(TokenType::SYMBOL_LESS_THAN_OR_EQUAL, '=<', '='); + yield from $example(TokenType::SYMBOL_ARROW_SINGLE, '=>', '='); + yield from $example(TokenType::SYMBOL_OPTCHAIN, '??', '??'); + yield from $example(TokenType::SYMBOL_NULLISH_COALESCE, '?.', '?.'); + + yield from $example(TokenType::WORD, '!NotAValidWord', '!'); + yield from $example(TokenType::WORD, '?N0t4V4l1dW0rd', '?'); + yield from $example(TokenType::WORD, '...1245NotAValidWord', '.'); + + yield from $example(TokenType::TEXT, '<', '<'); + yield from $example(TokenType::TEXT, '>', '>'); + yield from $example(TokenType::TEXT, '{', '{'); + yield from $example(TokenType::TEXT, '}', '}'); + + yield from $example(TokenType::SPACE, '{', '{'); + yield from $example(TokenType::SPACE, '}', '}'); + yield from $example(TokenType::SPACE, '💡', '💡'); + yield from $example(TokenType::SPACE, 'Anything but space', 'A'); + + yield from $example(TokenType::END_OF_LINE, '{', '{'); + yield from $example(TokenType::END_OF_LINE, '}', '}'); + yield from $example(TokenType::END_OF_LINE, '💡', '💡'); + yield from $example(TokenType::END_OF_LINE, 'Anything but \\n', 'A'); + } + + /** + * @dataProvider failingSingleTokenExamples + * @test + * @param string $source + * @param TokenType $expectedTokenType + * @param Range $affectedRangeInSource + * @param string $actualTokenValue + * @return void + */ + public function throwsIfCharacterSequenceDoesNotMatchSingleTokenType( + string $source, + TokenType $expectedTokenType, + Range $affectedRangeInSource, + string $actualTokenValue + ): void { + $this->assertThrowsLexerException( + function () use ($source, $expectedTokenType) { + $lexer = new Lexer($source); + $lexer->read($expectedTokenType); + }, + LexerException::becauseOfUnexpectedCharacterSequence( + expectedTokenTypes: TokenTypes::from($expectedTokenType), + affectedRangeInSource: $affectedRangeInSource, + actualCharacterSequence: $actualTokenValue + ) + ); + } + + /** + * @return iterable + */ + public static function failingMultipleTokensExamples(): iterable + { + yield ($source = "# This is a comment\nThis is not a comment") => [ + $source, + $tokenTypes = TokenTypes::from(TokenType::COMMENT, TokenType::END_OF_LINE), + 3, + LexerException::becauseOfUnexpectedCharacterSequence( + expectedTokenTypes: $tokenTypes, + affectedRangeInSource: self::range([1, 0], [1, 0]), + actualCharacterSequence: 'T' + ) + ]; + } + + /** + * @dataProvider failingMultipleTokensExamples + * @test + * @param string $source + * @param TokenTypes $tokenTypes + * @param integer $numberOfReadOperations + * @param LexerException $expectedLexerException + * @return void + */ + public function throwsIfCharacterSequenceDoesNotMatchMultipleTokenTypes( + string $source, + TokenTypes $tokenTypes, + int $numberOfReadOperations, + LexerException $expectedLexerException + ): void { + $this->assertThrowsLexerException( + function () use ($source, $tokenTypes, $numberOfReadOperations) { + $lexer = new Lexer($source); + + foreach(range(0, $numberOfReadOperations) as $i) { + $lexer->readOneOf($tokenTypes); + } + }, + $expectedLexerException + ); + } + + /** + * @test + */ + public function throwsIfSourceEndsUnexpectedlyWhileReadingASingleTokenType(): void + { + $this->assertThrowsLexerException( + function () { + $lexer = new Lexer(''); + $lexer->read(TokenType::KEYWORD_NULL); + }, + LexerException::becauseOfUnexpectedEndOfSource( + expectedTokenTypes: TokenTypes::from(TokenType::KEYWORD_NULL), + affectedRangeInSource: self::range([0, 0], [0, 0]) + ) + ); + + $this->assertThrowsLexerException( + function () { + $lexer = new Lexer('null'); + $lexer->read(TokenType::KEYWORD_NULL); + $lexer->read(TokenType::KEYWORD_NULL); + }, + LexerException::becauseOfUnexpectedEndOfSource( + expectedTokenTypes: TokenTypes::from(TokenType::KEYWORD_NULL), + affectedRangeInSource: self::range([0, 0], [0, 4]) + ) + ); + } + + /** + * @return iterable + */ + public static function multipleTokenTypeUnexpectedEndOfSourceExamples(): iterable + { + yield ($source = '') => [ + $source, + $tokenTypes = TokenTypes::from( + TokenType::KEYWORD_RETURN, + TokenType::KEYWORD_NULL, + TokenType::SPACE + ), + 1, + LexerException::becauseOfUnexpectedEndOfSource( + expectedTokenTypes: $tokenTypes, + affectedRangeInSource: self::range([0, 0], [0, 0]) + ) + ]; + + yield ($source = 'return') => [ + $source, + $tokenTypes = TokenTypes::from( + TokenType::KEYWORD_RETURN, + TokenType::KEYWORD_NULL, + TokenType::SPACE + ), + 2, + LexerException::becauseOfUnexpectedEndOfSource( + expectedTokenTypes: $tokenTypes, + affectedRangeInSource: self::range([0, 6], [0, 6]) + ) + ]; + + yield ($source = 'return ') => [ + $source, + $tokenTypes = TokenTypes::from( + TokenType::KEYWORD_RETURN, + TokenType::KEYWORD_NULL, + TokenType::SPACE + ), + 3, + LexerException::becauseOfUnexpectedEndOfSource( + expectedTokenTypes: $tokenTypes, + affectedRangeInSource: self::range([0, 7], [0, 7]) + ) + ]; + } + + /** + * @dataProvider multipleTokenTypeUnexpectedEndOfSourceExamples + * @test + * @param string $source + * @param TokenTypes $tokenTypes + * @param integer $numberOfReadOperations + * @param LexerException $expectedLexerException + * @return void + */ + public function throwsIfSourceEndsUnexpectedlyWhileReadingMultipleTokenTypes( + string $source, + TokenTypes $tokenTypes, + int $numberOfReadOperations, + LexerException $expectedLexerException + ): void { + $this->assertThrowsLexerException( + function () use ($source, $tokenTypes, $numberOfReadOperations) { + $lexer = new Lexer($source); + + foreach(range(0, $numberOfReadOperations) as $i) { + $lexer->readOneOf($tokenTypes); + } + }, + $expectedLexerException + ); + } + + /** + * @test + */ + public function skipsSpace(): void + { + // Single + $lexer = new Lexer('return ' . "\t\n\t" . ' 42'); + + $lexer->read(TokenType::KEYWORD_RETURN); + $lexer->skipSpace(); + $lexer->read(TokenType::INTEGER_DECIMAL); + + $this->assertEquals( + new Token( + rangeInSource: self::range([1, 4], [1, 5]), + type: TokenType::INTEGER_DECIMAL, + value: '42' + ), + $lexer->getTokenUnderCursor() + ); + + // Multiple + $lexer = new Lexer('return ' . "\t\n\t" . ' 42'); + + $lexer->readOneOf(TokenTypes::from(TokenType::KEYWORD_RETURN, TokenType::INTEGER_DECIMAL)); + $lexer->skipSpace(); + $lexer->readOneOf(TokenTypes::from(TokenType::KEYWORD_RETURN, TokenType::INTEGER_DECIMAL)); + + $this->assertEquals( + new Token( + rangeInSource: self::range([1, 4], [1, 5]), + type: TokenType::INTEGER_DECIMAL, + value: '42' + ), + $lexer->getTokenUnderCursor() + ); + } + + /** + * @test + */ + public function skipsSpaceAndComments(): void + { + $source = <<read(TokenType::KEYWORD_IMPORT); + $lexer->skipSpaceAndComments(); + $lexer->read(TokenType::KEYWORD_EXPORT); + $lexer->skipSpaceAndComments(); + $lexer->read(TokenType::KEYWORD_COMPONENT); + + $this->assertEquals( + new Token( + rangeInSource: self::range([6, 4], [6, 12]), + type: TokenType::KEYWORD_COMPONENT, + value: 'component' + ), + $lexer->getTokenUnderCursor() + ); + + // Multiple + $lexer = new Lexer($source); + $lexer->readOneOf( + TokenTypes::from( + TokenType::KEYWORD_IMPORT, + TokenType::KEYWORD_EXPORT, + TokenType::KEYWORD_COMPONENT + ) + ); + $lexer->skipSpaceAndComments(); + $lexer->readOneOf( + TokenTypes::from( + TokenType::KEYWORD_IMPORT, + TokenType::KEYWORD_EXPORT, + TokenType::KEYWORD_COMPONENT + ) + ); + $lexer->skipSpaceAndComments(); + $lexer->readOneOf( + TokenTypes::from( + TokenType::KEYWORD_IMPORT, + TokenType::KEYWORD_EXPORT, + TokenType::KEYWORD_COMPONENT + ) + ); + + $this->assertEquals( + new Token( + rangeInSource: self::range([6, 4], [6, 12]), + type: TokenType::KEYWORD_COMPONENT, + value: 'component' + ), + $lexer->getTokenUnderCursor() + ); + } + + /** + * @return iterable + */ + public static function illegalOperationsAfterFailureExamples(): iterable + { + yield [fn (Lexer $lexer) => $lexer->read(TokenType::KEYWORD_IMPORT)]; + yield [ + fn (Lexer $lexer) => $lexer->readOneOf( + TokenTypes::from( + TokenType::KEYWORD_IMPORT, + TokenType::KEYWORD_NULL, + TokenType::SYMBOL_ARROW_SINGLE, + TokenType::BRACKET_ANGLE_CLOSE, + ) + ) + ]; + yield [fn (Lexer $lexer) => $lexer->skipSpace()]; + yield [fn (Lexer $lexer) => $lexer->skipSpaceAndComments()]; + yield [fn (Lexer $lexer) => $lexer->getTokenUnderCursor()]; + } + + /** + * @dataProvider illegalOperationsAfterFailureExamples + * @test + * @param callable $operation + * @return void + */ + public function cannotBeReusedAfterFailure(callable $operation): void + { + $lexer = new Lexer('import'); + try { + $lexer->read(TokenType::SYMBOL_BOOLEAN_AND); + } catch (LexerException $e) { + } + + $this->expectException(AssertionError::class); + $operation($lexer); + } + + /** + * @test + */ + public function tellsIfItHasEnded(): void + { + $lexer = new Lexer(''); + + $this->assertTrue($lexer->isEnd()); + + $lexer = new Lexer('return null'); + + $this->assertFalse($lexer->isEnd()); + + $lexer->read(TokenType::KEYWORD_RETURN); + + $this->assertFalse($lexer->isEnd()); + + $lexer->read(TokenType::SPACE); + + $this->assertFalse($lexer->isEnd()); + + $lexer->read(TokenType::KEYWORD_NULL); + + $this->assertTrue($lexer->isEnd()); + } +} From 5022652cbc002c8294e496d3a1c5ec32e8b6952c Mon Sep 17 00:00:00 2001 From: Wilhelm Behncke Date: Wed, 9 Aug 2023 15:06:04 +0200 Subject: [PATCH 02/19] TASK: Enable zend.assertions in CI --- .github/actions/setup-php/action.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/actions/setup-php/action.yml b/.github/actions/setup-php/action.yml index 7669875..2a4b3f2 100644 --- a/.github/actions/setup-php/action.yml +++ b/.github/actions/setup-php/action.yml @@ -12,6 +12,7 @@ runs: with: php-version: ${{ inputs.php-version }} coverage: xdebug + ini-values: zend.assertions=1 - id: composer-cache run: echo "dir=$(composer config cache-files-dir)" >> $GITHUB_OUTPUT From 4f5b603af8f01b47613c57053394f646cede58d6 Mon Sep 17 00:00:00 2001 From: Wilhelm Behncke Date: Fri, 11 Aug 2023 11:34:09 +0200 Subject: [PATCH 03/19] TASK: Prepare Lexer interface for parser use cases --- .../Lexer/CharacterStream/CharacterStream.php | 24 ++ .../CharacterStreamSnapshot.php | 36 ++ src/Language/Lexer/CharacterStream/Cursor.php | 21 ++ .../Lexer/CharacterStream/CursorSnapshot.php | 37 ++ src/Language/Lexer/Lexer.php | 343 +++++++++++++----- src/Language/Lexer/LexerException.php | 37 ++ src/Language/Lexer/Matcher/Matcher.php | 20 +- .../Lexer/Matcher/Optional/Optional.php | 43 +++ src/Language/Lexer/Token/TokenType.php | 7 +- src/Language/Util/DebugHelper.php | 7 +- test/Unit/Language/Lexer/LexerTest.php | 136 ++++--- 11 files changed, 552 insertions(+), 159 deletions(-) create mode 100644 src/Language/Lexer/CharacterStream/CharacterStreamSnapshot.php create mode 100644 src/Language/Lexer/CharacterStream/CursorSnapshot.php create mode 100644 src/Language/Lexer/Matcher/Optional/Optional.php diff --git a/src/Language/Lexer/CharacterStream/CharacterStream.php b/src/Language/Lexer/CharacterStream/CharacterStream.php index df8a7c4..cf13037 100644 --- a/src/Language/Lexer/CharacterStream/CharacterStream.php +++ b/src/Language/Lexer/CharacterStream/CharacterStream.php @@ -24,6 +24,9 @@ use PackageFactory\ComponentEngine\Parser\Source\Position; +/** + * @internal + */ final class CharacterStream { private int $byte; @@ -81,4 +84,25 @@ public function getPreviousPosition(): Position { return $this->cursor->getPreviousPosition(); } + + public function makeSnapshot(): CharacterStreamSnapshot + { + return new CharacterStreamSnapshot( + byte: $this->byte, + cursor: $this->cursor->makeSnapshot(), + characterUnderCursor: $this->characterUnderCursor + ); + } + + public function restoreSnapshot(CharacterStreamSnapshot $snapshot): void + { + $this->byte = $snapshot->byte; + $this->cursor->restoreSnapshot($snapshot->cursor); + $this->characterUnderCursor = $snapshot->characterUnderCursor; + } + + public function getRest(): string + { + return $this->characterUnderCursor . substr($this->source, $this->byte); + } } diff --git a/src/Language/Lexer/CharacterStream/CharacterStreamSnapshot.php b/src/Language/Lexer/CharacterStream/CharacterStreamSnapshot.php new file mode 100644 index 0000000..e101b5f --- /dev/null +++ b/src/Language/Lexer/CharacterStream/CharacterStreamSnapshot.php @@ -0,0 +1,36 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\CharacterStream; + +/** + * @internal + */ +final class CharacterStreamSnapshot +{ + public function __construct( + public readonly int $byte, + public readonly CursorSnapshot $cursor, + public readonly ?string $characterUnderCursor = null + ) { + } +} diff --git a/src/Language/Lexer/CharacterStream/Cursor.php b/src/Language/Lexer/CharacterStream/Cursor.php index d2f5c48..f2bf00b 100644 --- a/src/Language/Lexer/CharacterStream/Cursor.php +++ b/src/Language/Lexer/CharacterStream/Cursor.php @@ -24,6 +24,9 @@ use PackageFactory\ComponentEngine\Parser\Source\Position; +/** + * @internal + */ final class Cursor { private int $currentLineNumber = 0; @@ -58,4 +61,22 @@ public function getPreviousPosition(): Position return new Position($this->previousLineNumber, $this->previousColumnNumber); } + + public function makeSnapshot(): CursorSnapshot + { + return new CursorSnapshot( + currentLineNumber: $this->currentLineNumber, + currentColumnNumber: $this->currentColumnNumber, + previousLineNumber: $this->previousLineNumber, + previousColumnNumber: $this->previousColumnNumber + ); + } + + public function restoreSnapshot(CursorSnapshot $snapshot): void + { + $this->currentLineNumber = $snapshot->currentLineNumber; + $this->currentColumnNumber = $snapshot->currentColumnNumber; + $this->previousLineNumber = $snapshot->previousLineNumber; + $this->previousColumnNumber = $snapshot->previousColumnNumber; + } } diff --git a/src/Language/Lexer/CharacterStream/CursorSnapshot.php b/src/Language/Lexer/CharacterStream/CursorSnapshot.php new file mode 100644 index 0000000..eadc09b --- /dev/null +++ b/src/Language/Lexer/CharacterStream/CursorSnapshot.php @@ -0,0 +1,37 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\CharacterStream; + +/** + * @internal + */ +final class CursorSnapshot +{ + public function __construct( + public readonly int $currentLineNumber, + public readonly int $currentColumnNumber, + public readonly int $previousLineNumber, + public readonly int $previousColumnNumber + ) { + } +} diff --git a/src/Language/Lexer/Lexer.php b/src/Language/Lexer/Lexer.php index 8993183..3a83ff1 100644 --- a/src/Language/Lexer/Lexer.php +++ b/src/Language/Lexer/Lexer.php @@ -22,6 +22,7 @@ namespace PackageFactory\ComponentEngine\Language\Lexer; +use LogicException; use PackageFactory\ComponentEngine\Language\Lexer\CharacterStream\CharacterStream; use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result; @@ -33,135 +34,253 @@ final class Lexer { + private readonly TokenTypes $TOKEN_TYPES_SPACE; + private readonly TokenTypes $TOKEN_TYPES_SPACE_AND_COMMENTS; + private readonly CharacterStream $characterStream; - private ?Position $startPosition = null; + private Position $startPosition; private int $offset = 0; private string $buffer = ''; private ?TokenType $tokenTypeUnderCursor = null; private ?Token $tokenUnderCursor = null; - private ?LexerException $latestError = null; public function __construct(string $source) { + $this->TOKEN_TYPES_SPACE = TokenTypes::from( + TokenType::SPACE, + TokenType::END_OF_LINE + ); + $this->TOKEN_TYPES_SPACE_AND_COMMENTS = TokenTypes::from( + TokenType::SPACE, + TokenType::END_OF_LINE, + TokenType::COMMENT + ); + $this->characterStream = new CharacterStream($source); + $this->startPosition = Position::zero(); } - public function read(TokenType $tokenType): void + public function getTokenTypeUnderCursor(): TokenType { - assert($this->latestError === null); - $this->startPosition = $this->characterStream->getCurrentPosition(); + assert($this->tokenTypeUnderCursor !== null); - if ($this->characterStream->isEnd()) { - throw $this->latestError = LexerException::becauseOfUnexpectedEndOfSource( - expectedTokenTypes: TokenTypes::from($tokenType), - affectedRangeInSource: $this->startPosition->toRange() + return $this->tokenTypeUnderCursor; + } + + public function getTokenUnderCursor(): Token + { + return $this->tokenUnderCursor ??= new Token( + rangeInSource: Range::from($this->startPosition, $this->getEndPosition()), + type: $this->getTokenTypeUnderCursor(), + value: $this->buffer + ); + } + + public function isEnd(): bool + { + return $this->characterStream->isEnd(); + } + + public function assertIsEnd(): void + { + if (!$this->isEnd()) { + throw LexerException::becauseOfUnexpectedExceedingSource( + affectedRangeInSource: $this->characterStream->getCurrentPosition()->toRange(), + exceedingCharacter: $this->characterStream->current() ?? '' ); } + } - $this->tokenTypeUnderCursor = null; - $this->tokenUnderCursor = null; - $this->offset = 0; - $this->buffer = ''; + public function getStartPosition(): Position + { - while (true) { - $character = $this->characterStream->current(); - $result = Matcher::for($tokenType)->match($character, $this->offset); + return $this->startPosition; + } - if ($result === Result::KEEP) { - $this->offset++; - $this->buffer .= $character; - $this->characterStream->next(); - continue; - } + public function getEndPosition(): Position + { - if ($result === Result::SATISFIED) { - $this->tokenTypeUnderCursor = $tokenType; - break; - } + return $this->characterStream->getPreviousPosition(); + } - if ($result === Result::CANCEL) { - throw $this->latestError = LexerException::becauseOfUnexpectedCharacterSequence( - expectedTokenTypes: TokenTypes::from($tokenType), - affectedRangeInSource: Range::from( - $this->startPosition, - $this->characterStream->getCurrentPosition() - ), - actualCharacterSequence: $this->buffer . $character - ); - } + public function read(TokenType $tokenType): void + { + + if ($this->characterStream->isEnd()) { + throw LexerException::becauseOfUnexpectedEndOfSource( + expectedTokenTypes: TokenTypes::from($tokenType), + affectedRangeInSource: $this->characterStream->getCurrentPosition()->toRange() + ); + } + + if ($this->extract($tokenType)) { + $this->tokenTypeUnderCursor = $tokenType; + return; } + + throw LexerException::becauseOfUnexpectedCharacterSequence( + expectedTokenTypes: TokenTypes::from($tokenType), + affectedRangeInSource: Range::from( + $this->startPosition, + $this->characterStream->getCurrentPosition() + ), + actualCharacterSequence: $this->buffer . $this->characterStream->current() + ); } public function readOneOf(TokenTypes $tokenTypes): void { - assert($this->latestError === null); - $this->startPosition = $this->characterStream->getCurrentPosition(); if ($this->characterStream->isEnd()) { - throw $this->latestError = LexerException::becauseOfUnexpectedEndOfSource( + throw LexerException::becauseOfUnexpectedEndOfSource( expectedTokenTypes: $tokenTypes, - affectedRangeInSource: $this->startPosition->toRange() + affectedRangeInSource: $this->characterStream->getCurrentPosition()->toRange() ); } - $this->tokenTypeUnderCursor = null; - $this->tokenUnderCursor = null; - $this->offset = 0; - $this->buffer = ''; + $foundTokenType = $this->extractOneOf($tokenTypes); + if ($foundTokenType === null) { + throw LexerException::becauseOfUnexpectedCharacterSequence( + expectedTokenTypes: $tokenTypes, + affectedRangeInSource: Range::from( + $this->startPosition, + $this->characterStream->getPreviousPosition() + ), + actualCharacterSequence: $this->buffer + ); + } - $tokenTypeCandidates = $tokenTypes->items; - while (count($tokenTypeCandidates)) { - $character = $this->characterStream->current(); + $this->tokenTypeUnderCursor = $foundTokenType; + } - $nextTokenTypeCandidates = []; - foreach ($tokenTypeCandidates as $tokenType) { - $result = Matcher::for($tokenType)->match($character, $this->offset); + public function probe(TokenType $tokenType): bool + { - if ($result === Result::KEEP) { - $nextTokenTypeCandidates[] = $tokenType; - continue; - } + if ($this->characterStream->isEnd()) { + return false; + } - if ($result === Result::SATISFIED) { - $this->tokenTypeUnderCursor = $tokenType; - return; - } - } + $snapshot = $this->characterStream->makeSnapshot(); - $this->offset++; - $this->buffer .= $character; - $tokenTypeCandidates = $nextTokenTypeCandidates; - $this->characterStream->next(); + if ($tokenType = $this->extract($tokenType)) { + $this->tokenTypeUnderCursor = $tokenType; + return true; } - throw $this->latestError = LexerException::becauseOfUnexpectedCharacterSequence( - expectedTokenTypes: $tokenTypes, - affectedRangeInSource: Range::from( - $this->startPosition, - $this->characterStream->getPreviousPosition() - ), - actualCharacterSequence: $this->buffer - ); + $this->characterStream->restoreSnapshot($snapshot); + return false; + } + + public function probeOneOf(TokenTypes $tokenTypes): bool + { + if ($this->characterStream->isEnd()) { + return false; + } + + $snapshot = $this->characterStream->makeSnapshot(); + + if ($tokenType = $this->extractOneOf($tokenTypes)) { + $this->tokenTypeUnderCursor = $tokenType; + return true; + } + + $this->characterStream->restoreSnapshot($snapshot); + return false; + } + + public function peek(TokenType $tokenType): bool + { + if ($this->characterStream->isEnd()) { + return false; + } + + $snapshot = $this->characterStream->makeSnapshot(); + $result = $this->extract($tokenType) !== null; + $this->characterStream->restoreSnapshot($snapshot); + + return $result; + } + + public function peekOneOf(TokenTypes $tokenTypes): ?TokenType + { + if ($this->characterStream->isEnd()) { + return null; + } + + $snapshot = $this->characterStream->makeSnapshot(); + $foundTokenType = $this->extractOneOf($tokenTypes); + $this->characterStream->restoreSnapshot($snapshot); + + return $foundTokenType; + } + + public function expect(TokenType $tokenType): void + { + if ($this->characterStream->isEnd()) { + throw LexerException::becauseOfUnexpectedEndOfSource( + expectedTokenTypes: TokenTypes::from($tokenType), + affectedRangeInSource: $this->characterStream->getCurrentPosition()->toRange() + ); + } + + $snapshot = $this->characterStream->makeSnapshot(); + if ($this->extract($tokenType) === null) { + throw LexerException::becauseOfUnexpectedCharacterSequence( + expectedTokenTypes: TokenTypes::from($tokenType), + affectedRangeInSource: Range::from( + $this->startPosition, + $this->characterStream->getPreviousPosition() + ), + actualCharacterSequence: $this->buffer + ); + } + + $this->characterStream->restoreSnapshot($snapshot); + } + + public function expectOneOf(TokenTypes $tokenTypes): TokenType + { + if ($this->characterStream->isEnd()) { + throw LexerException::becauseOfUnexpectedEndOfSource( + expectedTokenTypes: $tokenTypes, + affectedRangeInSource: $this->characterStream->getCurrentPosition()->toRange() + ); + } + + $snapshot = $this->characterStream->makeSnapshot(); + $foundTokenType = $this->extractOneOf($tokenTypes); + if ($foundTokenType === null) { + throw LexerException::becauseOfUnexpectedCharacterSequence( + expectedTokenTypes: $tokenTypes, + affectedRangeInSource: Range::from( + $this->startPosition, + $this->characterStream->getPreviousPosition() + ), + actualCharacterSequence: $this->buffer + ); + } + + $this->characterStream->restoreSnapshot($snapshot); + + return $foundTokenType; } public function skipSpace(): void { - assert($this->latestError === null); - $this->skip(TokenType::SPACE, TokenType::END_OF_LINE); + $this->skipAnyOf($this->TOKEN_TYPES_SPACE); } public function skipSpaceAndComments(): void { - assert($this->latestError === null); - $this->skip(TokenType::SPACE, TokenType::END_OF_LINE, TokenType::COMMENT); + $this->skipAnyOf($this->TOKEN_TYPES_SPACE_AND_COMMENTS); } - private function skip(TokenType ...$tokenTypes): void + private function skipAnyOf(TokenTypes $tokenTypes): void { while (true) { $character = $this->characterStream->current(); - foreach ($tokenTypes as $tokenType) { + foreach ($tokenTypes->items as $tokenType) { $matcher = Matcher::for($tokenType); if ($matcher->match($character, 0) === Result::KEEP) { @@ -174,24 +293,66 @@ private function skip(TokenType ...$tokenTypes): void } } - public function getTokenUnderCursor(): Token + private function extract(TokenType $tokenType): ?TokenType { - assert($this->latestError === null); - assert($this->startPosition !== null); - assert($this->tokenTypeUnderCursor !== null); + $this->startPosition = $this->characterStream->getCurrentPosition(); + $this->tokenUnderCursor = null; + $this->offset = 0; + $this->buffer = ''; - return $this->tokenUnderCursor ??= new Token( - rangeInSource: Range::from( - $this->startPosition, - $this->characterStream->getPreviousPosition() - ), - type: $this->tokenTypeUnderCursor, - value: $this->buffer - ); + while (true) { + $character = $this->characterStream->current(); + $result = Matcher::for($tokenType)->match($character, $this->offset); + + if ($result === Result::SATISFIED) { + return $tokenType; + } + + if ($result === Result::CANCEL) { + return null; + } + + $this->offset++; + $this->buffer .= $character; + $this->characterStream->next(); + } } - public function isEnd(): bool + private function extractOneOf(TokenTypes $tokenTypes): ?TokenType { - return $this->characterStream->isEnd(); + $this->startPosition = $this->characterStream->getCurrentPosition(); + $this->tokenUnderCursor = null; + $this->offset = 0; + $this->buffer = ''; + + $tokenTypeCandidates = $tokenTypes->items; + while (count($tokenTypeCandidates)) { + $character = $this->characterStream->current(); + + $nextTokenTypeCandidates = []; + foreach ($tokenTypeCandidates as $tokenType) { + $result = Matcher::for($tokenType)->match($character, $this->offset); + + if ($result === Result::SATISFIED) { + return $tokenType; + } + + if ($result === Result::KEEP) { + $nextTokenTypeCandidates[] = $tokenType; + } + } + + $this->offset++; + $this->buffer .= $character; + $tokenTypeCandidates = $nextTokenTypeCandidates; + $this->characterStream->next(); + } + + return null; + } + + public function dumpRest(): string + { + return $this->characterStream->getRest(); } } diff --git a/src/Language/Lexer/LexerException.php b/src/Language/Lexer/LexerException.php index 85e823f..99a0bbe 100644 --- a/src/Language/Lexer/LexerException.php +++ b/src/Language/Lexer/LexerException.php @@ -22,6 +22,7 @@ namespace PackageFactory\ComponentEngine\Language\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\Token\Token; use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; use PackageFactory\ComponentEngine\Language\Util\DebugHelper; use PackageFactory\ComponentEngine\Parser\Source\Range; @@ -33,6 +34,13 @@ private function __construct( string $message, public readonly Range $affectedRangeInSource ) { + $message = sprintf( + '[%s:%s] %s', + $affectedRangeInSource->start->lineNumber, + $affectedRangeInSource->start->columnNumber, + $message + ); + parent::__construct($message, $code); } @@ -65,4 +73,33 @@ public static function becauseOfUnexpectedCharacterSequence( affectedRangeInSource: $affectedRangeInSource ); } + + public static function becauseOfUnexpectedToken( + TokenTypes $expectedTokenTypes, + Token $actualToken + ): self { + return new self( + code: 1691575769, + message: sprintf( + 'Unexpected token "%s" was encountered. Expected %s instead.', + DebugHelper::describeToken($actualToken), + DebugHelper::describeTokenTypes($expectedTokenTypes) + ), + affectedRangeInSource: $actualToken->rangeInSource + ); + } + + public static function becauseOfUnexpectedExceedingSource( + Range $affectedRangeInSource, + string $exceedingCharacter + ): self { + return new self( + code: 1691675396, + message: sprintf( + 'Expected source to end, but found exceeding character "%s".', + $exceedingCharacter + ), + affectedRangeInSource: $affectedRangeInSource + ); + } } diff --git a/src/Language/Lexer/Matcher/Matcher.php b/src/Language/Lexer/Matcher/Matcher.php index af6249b..88e8d69 100644 --- a/src/Language/Lexer/Matcher/Matcher.php +++ b/src/Language/Lexer/Matcher/Matcher.php @@ -26,6 +26,7 @@ use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Exact\Exact; use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Fixed\Fixed; use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Not\Not; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Optional\Optional; use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Sequence\Sequence; use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; @@ -40,7 +41,10 @@ final public static function for(TokenType $tokenType): self { return self::$instancesByTokenType[$tokenType->value] ??= match ($tokenType) { TokenType::COMMENT => - new Sequence(new Exact('#'), new Not(new Exact("\n"))), + new Sequence( + new Exact('#'), + new Optional(new Not(new Exact("\n"))) + ), TokenType::KEYWORD_FROM => new Exact('from'), @@ -70,7 +74,7 @@ final public static function for(TokenType $tokenType): self TokenType::STRING_LITERAL_DELIMITER => new Exact('"'), TokenType::STRING_LITERAL_CONTENT => - new Not(new Characters('"\\' . "\n")), + new Not(new Characters('"\\')), TokenType::INTEGER_BINARY => new Sequence(new Exact('0b'), new Characters('01')), @@ -147,12 +151,16 @@ final public static function for(TokenType $tokenType): self new Exact('&&'), TokenType::SYMBOL_BOOLEAN_OR => new Exact('||'), - TokenType::SYMBOL_STRICT_EQUALs => + TokenType::SYMBOL_STRICT_EQUALS => new Exact('==='), - TokenType::SYMBOL_NOT_EQUALs => + TokenType::SYMBOL_NOT_EQUALS => new Exact('!=='), + TokenType::SYMBOL_GREATER_THAN => + new Exact('>'), TokenType::SYMBOL_GREATER_THAN_OR_EQUAL => new Exact('>='), + TokenType::SYMBOL_LESS_THAN => + new Exact('<'), TokenType::SYMBOL_LESS_THAN_OR_EQUAL => new Exact('<='), TokenType::SYMBOL_ARROW_SINGLE => @@ -161,13 +169,15 @@ final public static function for(TokenType $tokenType): self new Exact('?.'), TokenType::SYMBOL_NULLISH_COALESCE => new Exact('??'), + TokenType::SYMBOL_CLOSE_TAG => + new Exact(' new Characters( 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' ), TokenType::TEXT => - new Not(new Characters('<{}>')), + new Not(new Characters('<{}>' . " \t\n")), TokenType::SPACE => new Characters(" \t"), diff --git a/src/Language/Lexer/Matcher/Optional/Optional.php b/src/Language/Lexer/Matcher/Optional/Optional.php new file mode 100644 index 0000000..20de392 --- /dev/null +++ b/src/Language/Lexer/Matcher/Optional/Optional.php @@ -0,0 +1,43 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\Matcher\Optional; + +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; + +final class Optional extends Matcher +{ + public function __construct(private readonly Matcher $innerMatcher) + { + } + + public function match(?string $character, int $offset): Result + { + $result = $this->innerMatcher->match($character, $offset); + if ($offset === 0 && $result === Result::CANCEL) { + return Result::SATISFIED; + } + + return $result; + } +} diff --git a/src/Language/Lexer/Token/TokenType.php b/src/Language/Lexer/Token/TokenType.php index 93bd0c3..3fdbd7e 100644 --- a/src/Language/Lexer/Token/TokenType.php +++ b/src/Language/Lexer/Token/TokenType.php @@ -75,13 +75,16 @@ enum TokenType: string case SYMBOL_PIPE = 'SYMBOL_PIPE'; case SYMBOL_BOOLEAN_AND = 'SYMBOL_BOOLEAN_AND'; case SYMBOL_BOOLEAN_OR = 'SYMBOL_BOOLEAN_OR'; - case SYMBOL_STRICT_EQUALs = 'SYMBOL_STRICT_EQUALs'; - case SYMBOL_NOT_EQUALs = 'SYMBOL_NOT_EQUALs'; + case SYMBOL_STRICT_EQUALS = 'SYMBOL_STRICT_EQUALS'; + case SYMBOL_NOT_EQUALS = 'SYMBOL_NOT_EQUALS'; + case SYMBOL_GREATER_THAN = 'SYMBOL_GREATER_THAN'; case SYMBOL_GREATER_THAN_OR_EQUAL = 'SYMBOL_GREATER_THAN_OR_EQUAL'; + case SYMBOL_LESS_THAN = 'SYMBOL_LESS_THAN'; case SYMBOL_LESS_THAN_OR_EQUAL = 'SYMBOL_LESS_THAN_OR_EQUAL'; case SYMBOL_ARROW_SINGLE = 'SYMBOL_ARROW_SINGLE'; case SYMBOL_OPTCHAIN = 'SYMBOL_OPTCHAIN'; case SYMBOL_NULLISH_COALESCE = 'SYMBOL_NULLISH_COALESCE'; + case SYMBOL_CLOSE_TAG = 'SYMBOL_CLOSE_TAG'; case WORD = 'WORD'; case TEXT = 'TEXT'; diff --git a/src/Language/Util/DebugHelper.php b/src/Language/Util/DebugHelper.php index 154c98e..6382e86 100644 --- a/src/Language/Util/DebugHelper.php +++ b/src/Language/Util/DebugHelper.php @@ -82,13 +82,16 @@ public static function describeTokenType(TokenType $tokenType): string TokenType::SYMBOL_PIPE => ' ("|")', TokenType::SYMBOL_BOOLEAN_AND => ' ("&&")', TokenType::SYMBOL_BOOLEAN_OR => ' ("||")', - TokenType::SYMBOL_STRICT_EQUALs => ' ("===")', - TokenType::SYMBOL_NOT_EQUALs => ' ("!==")', + TokenType::SYMBOL_STRICT_EQUALS => ' ("===")', + TokenType::SYMBOL_NOT_EQUALS => ' ("!==")', + TokenType::SYMBOL_GREATER_THAN => ' (">")', TokenType::SYMBOL_GREATER_THAN_OR_EQUAL => ' (">=")', + TokenType::SYMBOL_LESS_THAN => ' ("<")', TokenType::SYMBOL_LESS_THAN_OR_EQUAL => ' ("<=")', TokenType::SYMBOL_ARROW_SINGLE => ' ("->")', TokenType::SYMBOL_OPTCHAIN => ' ("?.")', TokenType::SYMBOL_NULLISH_COALESCE => ' ("??")', + TokenType::SYMBOL_CLOSE_TAG => ' (" '', TokenType::TEXT => '', diff --git a/test/Unit/Language/Lexer/LexerTest.php b/test/Unit/Language/Lexer/LexerTest.php index 9cb8a03..da12dfc 100644 --- a/test/Unit/Language/Lexer/LexerTest.php +++ b/test/Unit/Language/Lexer/LexerTest.php @@ -22,7 +22,6 @@ namespace PackageFactory\ComponentEngine\Test\Unit\Language\Lexer; -use AssertionError; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\LexerException; use PackageFactory\ComponentEngine\Language\Lexer\Token\Token; @@ -64,6 +63,8 @@ protected function assertThrowsLexerException(callable $fn, LexerException $expe */ public static function singleTokenExamples(): iterable { + yield ($source = '#') => + [$source, TokenType::COMMENT]; yield ($source = '# This is a comment') => [$source, TokenType::COMMENT]; yield ($source = '# 🌵🆚⌚️: Multi-byte characters are not a problem inside a comment.') => @@ -170,9 +171,9 @@ public static function singleTokenExamples(): iterable yield ($source = '||') => [$source, TokenType::SYMBOL_BOOLEAN_OR]; yield ($source = '===') => - [$source, TokenType::SYMBOL_STRICT_EQUALs]; + [$source, TokenType::SYMBOL_STRICT_EQUALS]; yield ($source = '!==') => - [$source, TokenType::SYMBOL_NOT_EQUALs]; + [$source, TokenType::SYMBOL_NOT_EQUALS]; yield ($source = '>=') => [$source, TokenType::SYMBOL_GREATER_THAN_OR_EQUAL]; yield ($source = '<=') => @@ -191,9 +192,9 @@ public static function singleTokenExamples(): iterable yield ($source = '1245ValidWord') => [$source, TokenType::WORD]; - yield ($source = 'Just some text. Nothing special.') => + yield ($source = 'JustSomeText.TextTerminates-Only-At??Space//Characters.') => [$source, TokenType::TEXT]; - yield ($source = '🌵🆚⌚️: Multi-byte characters are not a problem inside of text.') => + yield ($source = '🌵🆚⌚️') => [$source, TokenType::TEXT]; yield ($source = ' ') => @@ -218,11 +219,63 @@ public static function singleTokenExamples(): iterable * @param TokenType $expectedTokenType * @return void */ - public function readsSingleToken(string $source, TokenType $expectedTokenType): void + public function readSavesTokenOfGivenTypeIfMatchIsFound(string $source, TokenType $expectedTokenType): void { $lexer = new Lexer($source); $lexer->read($expectedTokenType); + $this->assertEquals( + $expectedTokenType, + $lexer->getTokenTypeUnderCursor() + ); + + $this->assertEquals( + new Position(0, 0), + $lexer->getStartPosition() + ); + + $this->assertEquals( + new Position(0, \mb_strlen($source) - 1), + $lexer->getEndPosition() + ); + + $this->assertEquals( + new Token( + rangeInSource: self::range([0, 0], [0, \mb_strlen($source) - 1]), + type: $expectedTokenType, + value: $source + ), + $lexer->getTokenUnderCursor() + ); + } + + /** + * @dataProvider singleTokenExamples + * @test + * @param string $source + * @param TokenType $expectedTokenType + * @return void + */ + public function readOneOfSavesTokenOfGivenTypeIfMatchIsFound(string $source, TokenType $expectedTokenType): void + { + $lexer = new Lexer($source); + $lexer->readOneOf(TokenTypes::from($expectedTokenType)); + + $this->assertEquals( + $expectedTokenType, + $lexer->getTokenTypeUnderCursor() + ); + + $this->assertEquals( + new Position(0, 0), + $lexer->getStartPosition() + ); + + $this->assertEquals( + new Position(0, \mb_strlen($source) - 1), + $lexer->getEndPosition() + ); + $this->assertEquals( new Token( rangeInSource: self::range([0, 0], [0, \mb_strlen($source) - 1]), @@ -422,8 +475,8 @@ public static function multipleTokensExamples(): iterable ]; $source = << inside. + ThisIsSomeText-with-expressions{} + line-breaks, spaces andTags<>inside. AFX; yield $source => [ $source, @@ -431,18 +484,23 @@ public static function multipleTokensExamples(): iterable TokenType::TEXT, TokenType::BRACKET_CURLY_OPEN, TokenType::BRACKET_CURLY_CLOSE, + TokenType::SPACE, TokenType::END_OF_LINE, TokenType::BRACKET_ANGLE_OPEN, TokenType::BRACKET_ANGLE_CLOSE ), - new Token(self::range([0, 0], [0, 34]), TokenType::TEXT, 'This is some text with expressions '), - new Token(self::range([0, 35], [0, 35]), TokenType::BRACKET_CURLY_OPEN, '{'), - new Token(self::range([0, 36], [0, 36]), TokenType::BRACKET_CURLY_CLOSE, '}'), - new Token(self::range([0, 37], [0, 37]), TokenType::END_OF_LINE, "\n"), - new Token(self::range([1, 0], [1, 8]), TokenType::TEXT, 'and tags '), - new Token(self::range([1, 9], [1, 9]), TokenType::BRACKET_ANGLE_OPEN, '<'), - new Token(self::range([1, 10], [1, 10]), TokenType::BRACKET_ANGLE_CLOSE, '>'), - new Token(self::range([1, 11], [1, 18]), TokenType::TEXT, ' inside.'), + new Token(self::range([0, 0], [0, 30]), TokenType::TEXT, 'ThisIsSomeText-with-expressions'), + new Token(self::range([0, 31], [0, 31]), TokenType::BRACKET_CURLY_OPEN, '{'), + new Token(self::range([0, 32], [0, 32]), TokenType::BRACKET_CURLY_CLOSE, '}'), + new Token(self::range([0, 33], [0, 33]), TokenType::END_OF_LINE, "\n"), + new Token(self::range([1, 0], [1, 11]), TokenType::TEXT, 'line-breaks,'), + new Token(self::range([1, 12], [1, 14]), TokenType::SPACE, ' '), + new Token(self::range([1, 15], [1, 20]), TokenType::TEXT, 'spaces'), + new Token(self::range([1, 21], [1, 23]), TokenType::SPACE, ' '), + new Token(self::range([1, 24], [1, 30]), TokenType::TEXT, 'andTags'), + new Token(self::range([1, 31], [1, 31]), TokenType::BRACKET_ANGLE_OPEN, '<'), + new Token(self::range([1, 32], [1, 32]), TokenType::BRACKET_ANGLE_CLOSE, '>'), + new Token(self::range([1, 33], [1, 39]), TokenType::TEXT, 'inside.'), ]; } @@ -453,7 +511,7 @@ public static function multipleTokensExamples(): iterable * @param Token ...$expectedTokens * @return void */ - public function readsMultipleTokens( + public function testReadOneOfWithMultipleTokenTypes( string $source, TokenTypes $tokenTypes, Token ...$expectedTokens @@ -504,7 +562,6 @@ public static function failingSingleTokenExamples(): iterable yield from $example(TokenType::STRING_LITERAL_DELIMITER, '\'', '\''); yield from $example(TokenType::STRING_LITERAL_CONTENT, '"', '"'); - yield from $example(TokenType::STRING_LITERAL_CONTENT, "\n", "\n"); yield from $example(TokenType::STRING_LITERAL_CONTENT, '\\', '\\'); yield from $example(TokenType::INTEGER_BINARY, '001001', '00'); @@ -564,8 +621,8 @@ public static function failingSingleTokenExamples(): iterable yield from $example(TokenType::SYMBOL_PIPE, '🌵', '🌵'); yield from $example(TokenType::SYMBOL_BOOLEAN_AND, '§§', '§'); yield from $example(TokenType::SYMBOL_BOOLEAN_OR, '//', '/'); - yield from $example(TokenType::SYMBOL_STRICT_EQUALs, '!==', '!'); - yield from $example(TokenType::SYMBOL_NOT_EQUALs, '===', '='); + yield from $example(TokenType::SYMBOL_STRICT_EQUALS, '!==', '!'); + yield from $example(TokenType::SYMBOL_NOT_EQUALS, '===', '='); yield from $example(TokenType::SYMBOL_GREATER_THAN_OR_EQUAL, '=>', '='); yield from $example(TokenType::SYMBOL_LESS_THAN_OR_EQUAL, '=<', '='); yield from $example(TokenType::SYMBOL_ARROW_SINGLE, '=>', '='); @@ -875,45 +932,6 @@ public function skipsSpaceAndComments(): void ); } - /** - * @return iterable - */ - public static function illegalOperationsAfterFailureExamples(): iterable - { - yield [fn (Lexer $lexer) => $lexer->read(TokenType::KEYWORD_IMPORT)]; - yield [ - fn (Lexer $lexer) => $lexer->readOneOf( - TokenTypes::from( - TokenType::KEYWORD_IMPORT, - TokenType::KEYWORD_NULL, - TokenType::SYMBOL_ARROW_SINGLE, - TokenType::BRACKET_ANGLE_CLOSE, - ) - ) - ]; - yield [fn (Lexer $lexer) => $lexer->skipSpace()]; - yield [fn (Lexer $lexer) => $lexer->skipSpaceAndComments()]; - yield [fn (Lexer $lexer) => $lexer->getTokenUnderCursor()]; - } - - /** - * @dataProvider illegalOperationsAfterFailureExamples - * @test - * @param callable $operation - * @return void - */ - public function cannotBeReusedAfterFailure(callable $operation): void - { - $lexer = new Lexer('import'); - try { - $lexer->read(TokenType::SYMBOL_BOOLEAN_AND); - } catch (LexerException $e) { - } - - $this->expectException(AssertionError::class); - $operation($lexer); - } - /** * @test */ From dd1625d991cd10e6c96d745a9fd3c7b381e43a8d Mon Sep 17 00:00:00 2001 From: Wilhelm Behncke Date: Fri, 11 Aug 2023 13:05:12 +0200 Subject: [PATCH 04/19] TASK: Reform all parsers to use new Lexer --- .../Node/BinaryOperation/BinaryOperator.php | 2 + .../TemplateLiteral/TemplateLiteralLine.php | 32 ++ .../TemplateLiteral/TemplateLiteralLines.php | 36 ++ .../TemplateLiteral/TemplateLiteralNode.php | 3 +- .../BooleanLiteral/BooleanLiteralParser.php | 30 +- .../ComponentDeclarationParser.php | 143 ++--- .../EnumDeclaration/EnumDeclarationParser.php | 228 +++----- .../Parser/Export/ExportCouldNotBeParsed.php | 18 +- src/Language/Parser/Export/ExportParser.php | 99 ++-- .../Expression/ExpressionCouldNotBeParsed.php | 12 +- .../Parser/Expression/ExpressionParser.php | 502 +++++++---------- src/Language/Parser/Expression/Precedence.php | 31 +- .../Parser/Import/ImportCouldNotBeParsed.php | 7 +- src/Language/Parser/Import/ImportParser.php | 149 +++-- .../IntegerLiteralCouldNotBeParsed.php | 26 +- .../IntegerLiteral/IntegerLiteralParser.php | 73 +-- src/Language/Parser/Match/MatchParser.php | 219 ++------ .../Parser/Module/ModuleCouldNotBeParsed.php | 13 +- src/Language/Parser/Module/ModuleParser.php | 77 +-- .../Parser/NullLiteral/NullLiteralParser.php | 20 +- src/Language/Parser/ParserException.php | 15 +- .../PropertyDeclarationParser.php | 29 +- .../StringLiteral/StringLiteralParser.php | 35 +- .../StructDeclarationParser.php | 105 +--- src/Language/Parser/Tag/TagParser.php | 314 ++++------- .../TemplateLiteral/TemplateLiteralParser.php | 120 ++-- src/Language/Parser/Text/TextParser.php | 163 ++---- .../TypeReference/TypeReferenceParser.php | 102 +--- .../ValueReference/ValueReferenceParser.php | 20 +- src/Language/Util/DebugHelper.php | 87 +++ .../Loader/ModuleFile/ModuleFileLoader.php | 7 +- .../BinaryOperationTranspiler.php | 1 + .../TemplateLiteralTranspiler.php | 32 +- .../BinaryOperationTypeResolver.php | 1 + test/Integration/Examples/Numbers/Numbers.afx | 2 +- .../TemplateLiteral/TemplateLiteral.afx | 16 +- .../TemplateLiteral/TemplateLiteral.php | 2 +- .../PhpTranspilerIntegrationTest.php | 6 +- test/Unit/Language/ASTNodeFixtures.php | 64 +-- .../BooleanLiteralParserTest.php | 9 +- .../ComponentDeclarationParserTest.php | 17 +- .../EnumDeclarationParserTest.php | 53 +- .../Parser/Export/ExportParserTest.php | 44 +- .../Expression/ExpressionParserTest.php | 259 +++++---- .../Parser/Import/ImportParserTest.php | 23 +- .../IntegerLiteralParserTest.php | 66 ++- .../Language/Parser/Match/MatchParserTest.php | 55 +- .../Parser/Module/ModuleParserTest.php | 47 +- .../NullLiteral/NullLiteralParserTest.php | 5 +- .../PropertyDeclarationParserTest.php | 17 +- .../StringLiteral/StringLiteralParserTest.php | 26 +- .../StructDeclarationParserTest.php | 17 +- .../Language/Parser/Tag/TagParserTest.php | 147 ++--- .../TemplateLiteralParserTest.php | 528 ++++++++++++------ .../Language/Parser/Text/TextParserTest.php | 73 +-- .../TypeReference/TypeReferenceParserTest.php | 25 +- .../ValueReferenceParserTest.php | 5 +- .../IntegerLiteralTranspilerTest.php | 1 - .../TemplateLiteralTranspilerTest.php | 55 +- .../Expression/ExpressionTypeResolverTest.php | 37 +- .../TemplateLiteralTypeResolverTest.php | 37 +- 61 files changed, 2056 insertions(+), 2331 deletions(-) create mode 100644 src/Language/AST/Node/TemplateLiteral/TemplateLiteralLine.php create mode 100644 src/Language/AST/Node/TemplateLiteral/TemplateLiteralLines.php diff --git a/src/Language/AST/Node/BinaryOperation/BinaryOperator.php b/src/Language/AST/Node/BinaryOperation/BinaryOperator.php index 128c253..baf27f2 100644 --- a/src/Language/AST/Node/BinaryOperation/BinaryOperator.php +++ b/src/Language/AST/Node/BinaryOperation/BinaryOperator.php @@ -24,6 +24,8 @@ enum BinaryOperator: string { + case NULLISH_COALESCE = 'NULLISH_COALESCE'; + case AND = 'AND'; case OR = 'OR'; diff --git a/src/Language/AST/Node/TemplateLiteral/TemplateLiteralLine.php b/src/Language/AST/Node/TemplateLiteral/TemplateLiteralLine.php new file mode 100644 index 0000000..ab4363f --- /dev/null +++ b/src/Language/AST/Node/TemplateLiteral/TemplateLiteralLine.php @@ -0,0 +1,32 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral; + +final class TemplateLiteralLine +{ + public function __construct( + public readonly int $indentation, + public readonly TemplateLiteralSegments $segments + ) { + } +} diff --git a/src/Language/AST/Node/TemplateLiteral/TemplateLiteralLines.php b/src/Language/AST/Node/TemplateLiteral/TemplateLiteralLines.php new file mode 100644 index 0000000..65e6498 --- /dev/null +++ b/src/Language/AST/Node/TemplateLiteral/TemplateLiteralLines.php @@ -0,0 +1,36 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral; + +final class TemplateLiteralLines +{ + /** + * @var TemplateLiteralLine[] + */ + public readonly array $items; + + public function __construct(TemplateLiteralLine ...$items) + { + $this->items = $items; + } +} diff --git a/src/Language/AST/Node/TemplateLiteral/TemplateLiteralNode.php b/src/Language/AST/Node/TemplateLiteral/TemplateLiteralNode.php index 1fbbabc..1d17ce9 100644 --- a/src/Language/AST/Node/TemplateLiteral/TemplateLiteralNode.php +++ b/src/Language/AST/Node/TemplateLiteral/TemplateLiteralNode.php @@ -29,7 +29,8 @@ final class TemplateLiteralNode extends Node { public function __construct( public readonly Range $rangeInSource, - public readonly TemplateLiteralSegments $segments + public readonly int $indentation, + public readonly TemplateLiteralLines $lines ) { } } diff --git a/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php b/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php index f353929..de40bbf 100644 --- a/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php +++ b/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php @@ -24,29 +24,33 @@ use PackageFactory\ComponentEngine\Framework\PHP\Singleton\Singleton; use PackageFactory\ComponentEngine\Language\AST\Node\BooleanLiteral\BooleanLiteralNode; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Scanner; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; final class BooleanLiteralParser { use Singleton; - /** - * @param \Iterator $tokens - * @return BooleanLiteralNode - */ - public function parse(\Iterator &$tokens): BooleanLiteralNode + private static TokenTypes $TOKEN_TYPES_BOOLEAN_KEYWORDS; + + private function __construct() + { + self::$TOKEN_TYPES_BOOLEAN_KEYWORDS ??= TokenTypes::from( + TokenType::KEYWORD_TRUE, + TokenType::KEYWORD_FALSE + ); + } + + public function parse(Lexer $lexer): BooleanLiteralNode { - Scanner::assertType($tokens, TokenType::KEYWORD_TRUE, TokenType::KEYWORD_FALSE); + $lexer->readOneOf(self::$TOKEN_TYPES_BOOLEAN_KEYWORDS); - $token = $tokens->current(); + $token = $lexer->getTokenUnderCursor(); $value = $token->type === TokenType::KEYWORD_TRUE; - Scanner::skipOne($tokens); - return new BooleanLiteralNode( - rangeInSource: $token->boundaries, + rangeInSource: $token->rangeInSource, value: $value ); } diff --git a/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php b/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php index 2a3f176..ccdd11e 100644 --- a/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php +++ b/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php @@ -28,150 +28,89 @@ use PackageFactory\ComponentEngine\Language\AST\Node\ComponentDeclaration\ComponentNameNode; use PackageFactory\ComponentEngine\Language\AST\Node\Expression\ExpressionNode; use PackageFactory\ComponentEngine\Language\AST\Node\PropertyDeclaration\PropertyDeclarationNodes; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; use PackageFactory\ComponentEngine\Language\Parser\Expression\ExpressionParser; use PackageFactory\ComponentEngine\Language\Parser\PropertyDeclaration\PropertyDeclarationParser; use PackageFactory\ComponentEngine\Parser\Source\Range; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Scanner; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; final class ComponentDeclarationParser { use Singleton; + private static TokenTypes $TOKEN_TYPES_SPACE; + private ?PropertyDeclarationParser $propertyDeclarationParser = null; private ?ExpressionParser $returnParser = null; - /** - * @param \Iterator $tokens - * @return ComponentDeclarationNode - */ - public function parse(\Iterator &$tokens): ComponentDeclarationNode + private function __construct() { - $componentKeywordToken = $this->extractComponentKeywordToken($tokens); - $name = $this->parseName($tokens); - - $this->skipOpeningBracketToken($tokens); + self::$TOKEN_TYPES_SPACE ??= TokenTypes::from( + TokenType::SPACE, + TokenType::END_OF_LINE + ); + } - $props = $this->parseProps($tokens); + public function parse(Lexer $lexer): ComponentDeclarationNode + { + $lexer->read(TokenType::KEYWORD_COMPONENT); + $start = $lexer->getStartPosition(); + $lexer->skipSpace(); - $this->skipReturnKeywordToken($tokens); + $name = $this->parseName($lexer); + $props = $this->parseProps($lexer); + $return = $this->parseReturn($lexer); - $return = $this->parseReturn($tokens); - $closingBracketToken = $this->extractClosingBracketToken($tokens); + $lexer->read(TokenType::BRACKET_CURLY_CLOSE); + $end = $lexer->getEndPosition(); return new ComponentDeclarationNode( - rangeInSource: Range::from( - $componentKeywordToken->boundaries->start, - $closingBracketToken->boundaries->end - ), + rangeInSource: Range::from($start, $end), name: $name, props: $props, return: $return ); } - /** - * @param \Iterator $tokens - * @return Token - */ - private function extractComponentKeywordToken(\Iterator &$tokens): Token + private function parseName(Lexer $lexer): ComponentNameNode { - Scanner::assertType($tokens, TokenType::KEYWORD_COMPONENT); - - $componentKeywordToken = $tokens->current(); + $lexer->read(TokenType::WORD); + $componentNameToken = $lexer->getTokenUnderCursor(); - Scanner::skipOne($tokens); - Scanner::skipSpace($tokens); - - return $componentKeywordToken; - } - - /** - * @param \Iterator $tokens - * @return ComponentNameNode - */ - private function parseName(\Iterator &$tokens): ComponentNameNode - { - Scanner::assertType($tokens, TokenType::STRING); - - $componentNameToken = $tokens->current(); - - Scanner::skipOne($tokens); - Scanner::skipSpace($tokens); + $lexer->skipSpace(); return new ComponentNameNode( - rangeInSource: $componentNameToken->boundaries, + rangeInSource: $componentNameToken->rangeInSource, value: ComponentName::from($componentNameToken->value) ); } - /** - * @param \Iterator $tokens - * @return void - */ - private function skipOpeningBracketToken(\Iterator &$tokens): void - { - Scanner::assertType($tokens, TokenType::BRACKET_CURLY_OPEN); - Scanner::skipOne($tokens); - Scanner::skipSpaceAndComments($tokens); - } - - /** - * @param \Iterator $tokens - * @return PropertyDeclarationNodes - */ - private function parseProps(\Iterator &$tokens): PropertyDeclarationNodes + private function parseProps(Lexer $lexer): PropertyDeclarationNodes { $this->propertyDeclarationParser ??= PropertyDeclarationParser::singleton(); - $items = []; - while (Scanner::type($tokens) !== TokenType::KEYWORD_RETURN) { - assert($this->propertyDeclarationParser !== null); - $items[] = $this->propertyDeclarationParser->parse($tokens); + $lexer->read(TokenType::BRACKET_CURLY_OPEN); + $lexer->skipSpaceAndComments(); - Scanner::skipSpaceAndComments($tokens); + $items = []; + while (!$lexer->peek(TokenType::KEYWORD_RETURN)) { + $lexer->expect(TokenType::WORD); + $items[] = $this->propertyDeclarationParser->parse($lexer); + $lexer->skipSpaceAndComments(); } return new PropertyDeclarationNodes(...$items); } - /** - * @param \Iterator $tokens - * @return void - */ - private function skipReturnKeywordToken(\Iterator &$tokens): void - { - Scanner::assertType($tokens, TokenType::KEYWORD_RETURN); - Scanner::skipOne($tokens); - Scanner::skipSpaceAndComments($tokens); - } - - /** - * @param \Iterator $tokens - * @return ExpressionNode - */ - private function parseReturn(\Iterator &$tokens): ExpressionNode + private function parseReturn(Lexer $lexer): ExpressionNode { - $this->returnParser ??= new ExpressionParser( - stopAt: TokenType::BRACKET_CURLY_CLOSE - ); - - return $this->returnParser->parse($tokens); - } - - /** - * @param \Iterator $tokens - * @return Token - */ - private function extractClosingBracketToken(\Iterator &$tokens): Token - { - Scanner::assertType($tokens, TokenType::BRACKET_CURLY_CLOSE); - - $closingBracketToken = $tokens->current(); + $this->returnParser ??= new ExpressionParser(); - Scanner::skipOne($tokens); + $lexer->read(TokenType::KEYWORD_RETURN); + $lexer->readOneOf(self::$TOKEN_TYPES_SPACE); + $lexer->skipSpaceAndComments(); - return $closingBracketToken; + return $this->returnParser->parse($lexer); } } diff --git a/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php b/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php index 2630c0a..2783779 100644 --- a/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php +++ b/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php @@ -33,224 +33,140 @@ use PackageFactory\ComponentEngine\Language\AST\Node\EnumDeclaration\EnumNameNode; use PackageFactory\ComponentEngine\Language\AST\Node\IntegerLiteral\IntegerLiteralNode; use PackageFactory\ComponentEngine\Language\AST\Node\StringLiteral\StringLiteralNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; use PackageFactory\ComponentEngine\Language\Parser\IntegerLiteral\IntegerLiteralParser; use PackageFactory\ComponentEngine\Language\Parser\StringLiteral\StringLiteralParser; use PackageFactory\ComponentEngine\Parser\Source\Range; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Scanner; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; final class EnumDeclarationParser { use Singleton; + private static TokenTypes $TOKEN_TYPES_ENUM_MEMBER_VALUE_START; + private ?StringLiteralParser $stringLiteralParser = null; private ?IntegerLiteralParser $integerLiteralParser = null; - /** - * @param \Iterator $tokens - * @return EnumDeclarationNode - */ - public function parse(\Iterator &$tokens): EnumDeclarationNode + private function __construct() { - $enumKeyWordToken = $this->extractEnumKeywordToken($tokens); - $enumNameNode = $this->parseEnumName($tokens); - - $this->skipOpeningBracketToken($tokens); - - $enumMemberDeclarations = $this->parseEnumMemberDeclarations($tokens); - $closingBracketToken = $this->extractClosingBracketToken($tokens); - - return new EnumDeclarationNode( - rangeInSource: Range::from( - $enumKeyWordToken->boundaries->start, - $closingBracketToken->boundaries->end - ), - name: $enumNameNode, - members: $enumMemberDeclarations + self::$TOKEN_TYPES_ENUM_MEMBER_VALUE_START ??= TokenTypes::from( + TokenType::STRING_LITERAL_DELIMITER, + TokenType::INTEGER_BINARY, + TokenType::INTEGER_OCTAL, + TokenType::INTEGER_DECIMAL, + TokenType::INTEGER_HEXADECIMAL ); } - /** - * @param \Iterator $tokens - * @return Token - */ - private function extractEnumKeywordToken(\Iterator &$tokens): Token + public function parse(Lexer $lexer): EnumDeclarationNode { - Scanner::assertType($tokens, TokenType::KEYWORD_ENUM); + $lexer->read(TokenType::KEYWORD_ENUM); + $start = $lexer->getStartPosition(); + $lexer->skipSpace(); - $enumKeyWordToken = $tokens->current(); + $enumNameNode = $this->parseEnumName($lexer); + $enumMemberDeclarations = $this->parseEnumMemberDeclarations($lexer); - Scanner::skipOne($tokens); - Scanner::skipSpace($tokens); + $end = $lexer->getEndPosition(); - return $enumKeyWordToken; + return new EnumDeclarationNode( + rangeInSource: Range::from($start, $end), + name: $enumNameNode, + members: $enumMemberDeclarations + ); } - /** - * @param \Iterator $tokens - * @return EnumNameNode - */ - private function parseEnumName(\Iterator &$tokens): EnumNameNode + private function parseEnumName(Lexer $lexer): EnumNameNode { - Scanner::assertType($tokens, TokenType::STRING); + $lexer->read(TokenType::WORD); + $enumKeyNameToken = $lexer->getTokenUnderCursor(); + $lexer->skipSpace(); - $enumKeyNameToken = $tokens->current(); - $enumNameNode = new EnumNameNode( - rangeInSource: $enumKeyNameToken->boundaries, + return new EnumNameNode( + rangeInSource: $enumKeyNameToken->rangeInSource, value: EnumName::from($enumKeyNameToken->value) ); - - Scanner::skipOne($tokens); - Scanner::skipSpace($tokens); - - return $enumNameNode; } - /** - * @param \Iterator $tokens - * @return void - */ - private function skipOpeningBracketToken(\Iterator &$tokens): void + private function parseEnumMemberDeclarations(Lexer $lexer): EnumMemberDeclarationNodes { - Scanner::assertType($tokens, TokenType::BRACKET_CURLY_OPEN); - Scanner::skipOne($tokens); - } + $lexer->read(TokenType::BRACKET_CURLY_OPEN); + $lexer->skipSpaceAndComments(); - /** - * @param \Iterator $tokens - * @return EnumMemberDeclarationNodes - */ - private function parseEnumMemberDeclarations(\Iterator &$tokens): EnumMemberDeclarationNodes - { $items = []; - while (true) { - Scanner::skipSpaceAndComments($tokens); - - switch (Scanner::type($tokens)) { - case TokenType::STRING: - $items[] = $this->parseEnumMemberDeclaration($tokens); - break; - case TokenType::BRACKET_CURLY_CLOSE: - break 2; - default: - Scanner::assertType($tokens, TokenType::STRING, TokenType::BRACKET_CURLY_CLOSE); - } + while (!$lexer->peek(TokenType::BRACKET_CURLY_CLOSE)) { + $items[] = $this->parseEnumMemberDeclaration($lexer); } + $lexer->read(TokenType::BRACKET_CURLY_CLOSE); + return new EnumMemberDeclarationNodes(...$items); } - /** - * @param \Iterator $tokens - * @return Token - */ - private function extractClosingBracketToken(\Iterator &$tokens): Token + private function parseEnumMemberDeclaration(Lexer $lexer): EnumMemberDeclarationNode { - Scanner::skipSpace($tokens); - Scanner::assertType($tokens, TokenType::BRACKET_CURLY_CLOSE); - - $closingBracketToken = $tokens->current(); - - Scanner::skipOne($tokens); + $name = $this->parseEnumMemberName($lexer); + $value = $this->parseEnumMemberValue($lexer); - return $closingBracketToken; - } - - /** - * @param \Iterator $tokens - * @return EnumMemberDeclarationNode - */ - private function parseEnumMemberDeclaration(\Iterator &$tokens): EnumMemberDeclarationNode - { - $enumMemberName = $this->parseEnumMemberName($tokens); - $value = $this->parseEnumMemberValue($tokens); + $lexer->skipSpaceAndComments(); return new EnumMemberDeclarationNode( rangeInSource: Range::from( - $enumMemberName->rangeInSource->start, + $name->rangeInSource->start, $value?->rangeInSource->end - ?? $enumMemberName->rangeInSource->end + ?? $name->rangeInSource->end ), - name: $enumMemberName, + name: $name, value: $value ); } - /** - * @param \Iterator $tokens - * @return EnumMemberNameNode - */ - private function parseEnumMemberName(\Iterator &$tokens): EnumMemberNameNode + private function parseEnumMemberName(Lexer $lexer): EnumMemberNameNode { - Scanner::assertType($tokens, TokenType::STRING); + $lexer->read(TokenType::WORD); + $enumMemberNameToken = $lexer->getTokenUnderCursor(); - $enumMemberNameToken = $tokens->current(); - $enumMemberNameNode = new EnumMemberNameNode( - rangeInSource: $enumMemberNameToken->boundaries, + return new EnumMemberNameNode( + rangeInSource: $enumMemberNameToken->rangeInSource, value: EnumMemberName::from($enumMemberNameToken->value) ); - - Scanner::skipOne($tokens); - - return $enumMemberNameNode; } - /** - * @param \Iterator $tokens - * @return null|EnumMemberValueNode - */ - private function parseEnumMemberValue(\Iterator &$tokens): ?EnumMemberValueNode + private function parseEnumMemberValue(Lexer $lexer): ?EnumMemberValueNode { - if (Scanner::type($tokens) !== TokenType::BRACKET_ROUND_OPEN) { - return null; + if ($lexer->probe(TokenType::BRACKET_ROUND_OPEN)) { + $start = $lexer->getStartPosition(); + + $value = match ($lexer->expectOneOf(self::$TOKEN_TYPES_ENUM_MEMBER_VALUE_START)) { + TokenType::STRING_LITERAL_DELIMITER => + $this->parseStringLiteral($lexer), + default => + $this->parseIntegerLiteral($lexer) + }; + + $lexer->read(TokenType::BRACKET_ROUND_CLOSE); + $end = $lexer->getEndPosition(); + + return new EnumMemberValueNode( + rangeInSource: Range::from($start, $end), + value: $value + ); } - $openingBracketToken = $tokens->current(); - Scanner::skipOne($tokens); - - $valueToken = $tokens->current(); - $value = match ($valueToken->type) { - TokenType::STRING_QUOTED => - $this->parseStringLiteral($tokens), - TokenType::NUMBER_BINARY, - TokenType::NUMBER_OCTAL, - TokenType::NUMBER_DECIMAL, - TokenType::NUMBER_HEXADECIMAL => - $this->parseIntegerLiteral($tokens), - default => throw new \Exception('@TODO: Unexpected Token ' . Scanner::type($tokens)->value) - }; - - Scanner::assertType($tokens, TokenType::BRACKET_ROUND_CLOSE); - $closingBracketToken = $tokens->current(); - Scanner::skipOne($tokens); - - return new EnumMemberValueNode( - rangeInSource: Range::from( - $openingBracketToken->boundaries->start, - $closingBracketToken->boundaries->end - ), - value: $value - ); + return null; } - /** - * @param \Iterator $tokens - * @return StringLiteralNode - */ - private function parseStringLiteral(\Iterator &$tokens): StringLiteralNode + private function parseStringLiteral(Lexer $lexer): StringLiteralNode { $this->stringLiteralParser ??= StringLiteralParser::singleton(); - return $this->stringLiteralParser->parse($tokens); + return $this->stringLiteralParser->parse($lexer); } - /** - * @param \Iterator $tokens - * @return IntegerLiteralNode - */ - private function parseIntegerLiteral(\Iterator &$tokens): IntegerLiteralNode + private function parseIntegerLiteral(Lexer $lexer): IntegerLiteralNode { $this->integerLiteralParser ??= IntegerLiteralParser::singleton(); - return $this->integerLiteralParser->parse($tokens); + return $this->integerLiteralParser->parse($lexer); } } diff --git a/src/Language/Parser/Export/ExportCouldNotBeParsed.php b/src/Language/Parser/Export/ExportCouldNotBeParsed.php index 53da8b6..c1671f2 100644 --- a/src/Language/Parser/Export/ExportCouldNotBeParsed.php +++ b/src/Language/Parser/Export/ExportCouldNotBeParsed.php @@ -23,24 +23,8 @@ namespace PackageFactory\ComponentEngine\Language\Parser\Export; use PackageFactory\ComponentEngine\Language\Parser\ParserException; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenTypes; final class ExportCouldNotBeParsed extends ParserException { - public static function becauseOfUnexpectedToken( - TokenTypes $expectedTokenTypes, - Token $actualToken - ): self { - return new self( - code: 1691184282, - message: sprintf( - 'Export could not be parsed because of unexpected token %s. ' - . 'Expected %s instead.', - $actualToken->toDebugString(), - $expectedTokenTypes->toDebugString() - ), - affectedRangeInSource: $actualToken->boundaries - ); - } + protected const TITLE = 'Export could not be parsed'; } diff --git a/src/Language/Parser/Export/ExportParser.php b/src/Language/Parser/Export/ExportParser.php index c0fa9ed..ae14bf4 100644 --- a/src/Language/Parser/Export/ExportParser.php +++ b/src/Language/Parser/Export/ExportParser.php @@ -22,100 +22,81 @@ namespace PackageFactory\ComponentEngine\Language\Parser\Export; +use LogicException; use PackageFactory\ComponentEngine\Framework\PHP\Singleton\Singleton; use PackageFactory\ComponentEngine\Language\AST\Node\ComponentDeclaration\ComponentDeclarationNode; use PackageFactory\ComponentEngine\Language\AST\Node\EnumDeclaration\EnumDeclarationNode; use PackageFactory\ComponentEngine\Language\AST\Node\Export\ExportNode; use PackageFactory\ComponentEngine\Language\AST\Node\StructDeclaration\StructDeclarationNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\LexerException; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; use PackageFactory\ComponentEngine\Language\Parser\ComponentDeclaration\ComponentDeclarationParser; use PackageFactory\ComponentEngine\Language\Parser\EnumDeclaration\EnumDeclarationParser; use PackageFactory\ComponentEngine\Language\Parser\StructDeclaration\StructDeclarationParser; use PackageFactory\ComponentEngine\Parser\Source\Range; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Scanner; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenTypes; final class ExportParser { use Singleton; + private static TokenTypes $TOKEN_TYPES_DECLARATION_KEYWORDS; + private ?ComponentDeclarationParser $componentDeclarationParser = null; private ?EnumDeclarationParser $enumDeclarationParser = null; private ?StructDeclarationParser $structDeclarationParser = null; - /** - * @param \Iterator $tokens - * @return ExportNode - */ - public function parse(\Iterator &$tokens): ExportNode + private function __construct() { - $exportKeywordToken = $this->extractToken($tokens, TokenType::KEYWORD_EXPORT); - $declaration = match (Scanner::type($tokens)) { - TokenType::KEYWORD_COMPONENT => $this->parseComponentDeclaration($tokens), - TokenType::KEYWORD_ENUM => $this->parseEnumDeclaration($tokens), - TokenType::KEYWORD_STRUCT => $this->parseStructDeclaration($tokens), - default => throw ExportCouldNotBeParsed::becauseOfUnexpectedToken( - expectedTokenTypes: TokenTypes::from( - TokenType::KEYWORD_COMPONENT, - TokenType::KEYWORD_ENUM, - TokenType::KEYWORD_STRUCT - ), - actualToken: $tokens->current() - ) - }; - - return new ExportNode( - rangeInSource: Range::from( - $exportKeywordToken->boundaries->start, - $declaration->rangeInSource->end - ), - declaration: $declaration + self::$TOKEN_TYPES_DECLARATION_KEYWORDS ??= TokenTypes::from( + TokenType::KEYWORD_COMPONENT, + TokenType::KEYWORD_ENUM, + TokenType::KEYWORD_STRUCT ); } - /** - * @param \Iterator $tokens - * @param TokenType $tokenType - * @return Token - */ - private function extractToken(\Iterator &$tokens, TokenType $tokenType): Token + public function parse(Lexer $lexer): ExportNode { - Scanner::assertType($tokens, $tokenType); - $token = $tokens->current(); - Scanner::skipOne($tokens); - Scanner::skipSpace($tokens); + try { + $lexer->read(TokenType::KEYWORD_EXPORT); + $start = $lexer->getStartPosition(); + + $lexer->skipSpace(); + + $declaration = match ($lexer->expectOneOf(self::$TOKEN_TYPES_DECLARATION_KEYWORDS)) { + TokenType::KEYWORD_COMPONENT => $this->parseComponentDeclaration($lexer), + TokenType::KEYWORD_ENUM => $this->parseEnumDeclaration($lexer), + TokenType::KEYWORD_STRUCT => $this->parseStructDeclaration($lexer), + default => throw new LogicException() + }; + + $end = $lexer->getEndPosition(); - return $token; + return new ExportNode( + rangeInSource: Range::from($start, $end), + declaration: $declaration + ); + } catch (LexerException $e) { + throw ExportCouldNotBeParsed::becauseOfLexerException($e); + } } - /** - * @param \Iterator $tokens - * @return ComponentDeclarationNode - */ - private function parseComponentDeclaration(\Iterator &$tokens): ComponentDeclarationNode + private function parseComponentDeclaration(Lexer $lexer): ComponentDeclarationNode { $this->componentDeclarationParser ??= ComponentDeclarationParser::singleton(); - return $this->componentDeclarationParser->parse($tokens); + return $this->componentDeclarationParser->parse($lexer); } - /** - * @param \Iterator $tokens - * @return EnumDeclarationNode - */ - private function parseEnumDeclaration(\Iterator &$tokens): EnumDeclarationNode + private function parseEnumDeclaration(Lexer $lexer): EnumDeclarationNode { $this->enumDeclarationParser ??= EnumDeclarationParser::singleton(); - return $this->enumDeclarationParser->parse($tokens); + return $this->enumDeclarationParser->parse($lexer); } - /** - * @param \Iterator $tokens - * @return StructDeclarationNode - */ - private function parseStructDeclaration(\Iterator &$tokens): StructDeclarationNode + private function parseStructDeclaration(Lexer $lexer): StructDeclarationNode { $this->structDeclarationParser ??= StructDeclarationParser::singleton(); - return $this->structDeclarationParser->parse($tokens); + return $this->structDeclarationParser->parse($lexer); } } diff --git a/src/Language/Parser/Expression/ExpressionCouldNotBeParsed.php b/src/Language/Parser/Expression/ExpressionCouldNotBeParsed.php index d3d4533..47cb22b 100644 --- a/src/Language/Parser/Expression/ExpressionCouldNotBeParsed.php +++ b/src/Language/Parser/Expression/ExpressionCouldNotBeParsed.php @@ -22,9 +22,11 @@ namespace PackageFactory\ComponentEngine\Language\Parser\Expression; +use PackageFactory\ComponentEngine\Language\Lexer\Token\Token; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; use PackageFactory\ComponentEngine\Language\Parser\ParserException; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenTypes; +use PackageFactory\ComponentEngine\Language\Util\DebugHelper; final class ExpressionCouldNotBeParsed extends ParserException { @@ -37,10 +39,10 @@ public static function becauseOfUnexpectedToken( message: sprintf( 'Expression could not be parsed because of unexpected token %s. ' . 'Expected %s instead.', - $actualToken->toDebugString(), - $expectedTokenTypes->toDebugString() + DebugHelper::describeToken($actualToken), + DebugHelper::describeTokenTypes($expectedTokenTypes) ), - affectedRangeInSource: $actualToken->boundaries + affectedRangeInSource: $actualToken->rangeInSource ); } } diff --git a/src/Language/Parser/Expression/ExpressionParser.php b/src/Language/Parser/Expression/ExpressionParser.php index 4bad737..ca741aa 100644 --- a/src/Language/Parser/Expression/ExpressionParser.php +++ b/src/Language/Parser/Expression/ExpressionParser.php @@ -22,6 +22,7 @@ namespace PackageFactory\ComponentEngine\Language\Parser\Expression; +use LogicException; use PackageFactory\ComponentEngine\Domain\PropertyName\PropertyName; use PackageFactory\ComponentEngine\Language\AST\Node\Access\AccessKeyNode; use PackageFactory\ComponentEngine\Language\AST\Node\Access\AccessNode; @@ -32,6 +33,9 @@ use PackageFactory\ComponentEngine\Language\AST\Node\TernaryOperation\TernaryOperationNode; use PackageFactory\ComponentEngine\Language\AST\Node\UnaryOperation\UnaryOperationNode; use PackageFactory\ComponentEngine\Language\AST\Node\UnaryOperation\UnaryOperator; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; use PackageFactory\ComponentEngine\Language\Parser\BooleanLiteral\BooleanLiteralParser; use PackageFactory\ComponentEngine\Language\Parser\IntegerLiteral\IntegerLiteralParser; use PackageFactory\ComponentEngine\Language\Parser\Match\MatchParser; @@ -41,14 +45,14 @@ use PackageFactory\ComponentEngine\Language\Parser\TemplateLiteral\TemplateLiteralParser; use PackageFactory\ComponentEngine\Language\Parser\ValueReference\ValueReferenceParser; use PackageFactory\ComponentEngine\Parser\Source\Range; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Scanner; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenTypes; -use PhpParser\Parser\Tokens; final class ExpressionParser { + private static TokenTypes $TOKEN_TYPES_ACCESS; + private static TokenTypes $TOKEN_TYPES_BINARY_OPERATORS; + private static TokenTypes $TOKEN_TYPES_UNARY; + private static TokenTypes $TOKEN_TYPES_CLOSING_DELIMITERS; + private ?BooleanLiteralParser $booleanLiteralParser = null; private ?IntegerLiteralParser $integerLiteralParser = null; private ?MatchParser $matchParser = null; @@ -59,137 +63,133 @@ final class ExpressionParser private ?ValueReferenceParser $valueReferenceParser = null; public function __construct( - private ?TokenType $stopAt = null, private Precedence $precedence = Precedence::SEQUENCE ) { + self::$TOKEN_TYPES_ACCESS ??= TokenTypes::from( + TokenType::SYMBOL_PERIOD, + TokenType::SYMBOL_OPTCHAIN + ); + self::$TOKEN_TYPES_BINARY_OPERATORS ??= TokenTypes::from( + TokenType::SYMBOL_NULLISH_COALESCE, + TokenType::SYMBOL_BOOLEAN_AND, + TokenType::SYMBOL_BOOLEAN_OR, + TokenType::SYMBOL_STRICT_EQUALS, + TokenType::SYMBOL_NOT_EQUALS, + TokenType::SYMBOL_GREATER_THAN, + TokenType::SYMBOL_LESS_THAN + ); + self::$TOKEN_TYPES_UNARY ??= TokenTypes::from( + TokenType::SYMBOL_EXCLAMATIONMARK, + TokenType::KEYWORD_TRUE, + TokenType::KEYWORD_FALSE, + TokenType::KEYWORD_NULL, + TokenType::KEYWORD_MATCH, + TokenType::STRING_LITERAL_DELIMITER, + TokenType::INTEGER_HEXADECIMAL, + TokenType::INTEGER_DECIMAL, + TokenType::INTEGER_OCTAL, + TokenType::INTEGER_BINARY, + TokenType::WORD, + TokenType::BRACKET_ANGLE_OPEN, + TokenType::BRACKET_ROUND_OPEN + ); + self::$TOKEN_TYPES_CLOSING_DELIMITERS = TokenTypes::from( + TokenType::BRACKET_CURLY_OPEN, + TokenType::BRACKET_CURLY_CLOSE, + TokenType::BRACKET_ROUND_CLOSE, + TokenType::SYMBOL_COLON, + TokenType::SYMBOL_COMMA, + TokenType::SYMBOL_ARROW_SINGLE + ); } - /** - * @param \Iterator $tokens - * @return ExpressionNode - */ - public function parse(\Iterator &$tokens): ExpressionNode + public function parse(Lexer $lexer): ExpressionNode { - Scanner::skipSpaceAndComments($tokens); + $result = $this->parseUnaryStatement($lexer); - $result = $this->parseUnaryStatement($tokens); + while (!$lexer->isEnd()) { + $lexer->skipSpaceAndComments(); - if ($this->shouldStop($tokens)) { - return $result; - } + if ($lexer->peekOneOf(self::$TOKEN_TYPES_CLOSING_DELIMITERS)) { + return $result; + } - $binaryOperationTokens = TokenTypes::from( - TokenType::OPERATOR_BOOLEAN_AND, - TokenType::OPERATOR_BOOLEAN_OR, - TokenType::COMPARATOR_EQUAL, - TokenType::COMPARATOR_NOT_EQUAL, - TokenType::COMPARATOR_GREATER_THAN, - TokenType::COMPARATOR_GREATER_THAN_OR_EQUAL, - TokenType::COMPARATOR_LESS_THAN, - TokenType::COMPARATOR_LESS_THAN_OR_EQUAL - ); + if ($lexer->probeOneOf(self::$TOKEN_TYPES_ACCESS)) { + $result = $this->parseAcccess($lexer, $result); + continue; + } - while ( - !$this->shouldStop($tokens) && - $binaryOperationTokens->contains(Scanner::type($tokens)) - ) { - $result = $this->parseBinaryOperation($tokens, $result); - } + if ($lexer->peek(TokenType::SYMBOL_QUESTIONMARK)) { + if ($this->precedence->mustStopAt(TokenType::SYMBOL_QUESTIONMARK)) { + return $result; + } + + $result = $this->parseTernaryOperation($lexer, $result); + continue; + } + + if ($tokenType = $lexer->peekOneOf(self::$TOKEN_TYPES_BINARY_OPERATORS)) { + if ($this->precedence->mustStopAt($tokenType)) { + return $result; + } + + $result = $this->parseBinaryOperation($lexer, $result); + continue; + } - if ($this->shouldStop($tokens)) { return $result; } - $result = match (Scanner::type($tokens)) { - TokenType::QUESTIONMARK => - $this->parseTernaryOperation($tokens, $result), - default => - throw ExpressionCouldNotBeParsed::becauseOfUnexpectedToken( - expectedTokenTypes: TokenTypes::from(TokenType::QUESTIONMARK), - actualToken: $tokens->current() - ) - }; - return $result; } - /** - * @param \Iterator $tokens - * @return ExpressionNode - */ - private function parseUnaryStatement(\Iterator &$tokens): ExpressionNode + private function parseUnaryStatement(Lexer $lexer): ExpressionNode { - $result = match (Scanner::type($tokens)) { - TokenType::OPERATOR_BOOLEAN_NOT => - $this->parseUnaryOperation($tokens), - TokenType::KEYWORD_TRUE, - TokenType::KEYWORD_FALSE => - $this->parseBooleanLiteral($tokens), - TokenType::KEYWORD_NULL => - $this->parseNullLiteral($tokens), - TokenType::STRING_QUOTED => - $this->parseStringLiteral($tokens), - TokenType::NUMBER_BINARY, - TokenType::NUMBER_OCTAL, - TokenType::NUMBER_DECIMAL, - TokenType::NUMBER_HEXADECIMAL => - $this->parseIntegerLiteral($tokens), - TokenType::STRING => - $this->parseValueReference($tokens), - TokenType::TAG_START_OPENING => - $this->parseTag($tokens), - TokenType::TEMPLATE_LITERAL_START => - $this->parseTemplateLiteral($tokens), - TokenType::KEYWORD_MATCH => - $this->parseMatch($tokens), - TokenType::BRACKET_ROUND_OPEN => - $this->parseBracketedExpression($tokens), - default => - throw ExpressionCouldNotBeParsed::becauseOfUnexpectedToken( - expectedTokenTypes: TokenTypes::from( - TokenType::KEYWORD_TRUE, - TokenType::KEYWORD_FALSE, - TokenType::KEYWORD_NULL, - TokenType::STRING_QUOTED, - TokenType::NUMBER_BINARY, - TokenType::NUMBER_OCTAL, - TokenType::NUMBER_DECIMAL, - TokenType::NUMBER_HEXADECIMAL, - TokenType::STRING, - TokenType::TAG_START_OPENING, - TokenType::TEMPLATE_LITERAL_START, - TokenType::KEYWORD_MATCH, - TokenType::BRACKET_ROUND_OPEN - ), - actualToken: $tokens->current() - ) - }; - - if (!Scanner::isEnd($tokens)) { - $result = match (Scanner::type($tokens)) { - TokenType::PERIOD, - TokenType::OPTCHAIN => $this->parseAcccess($tokens, $result), - default => $result + if ($lexer->peek(TokenType::TEMPLATE_LITERAL_DELIMITER)) { + $result = $this->parseTemplateLiteral($lexer); + } else { + $result = match ($lexer->expectOneOf(self::$TOKEN_TYPES_UNARY)) { + TokenType::SYMBOL_EXCLAMATIONMARK => + $this->parseUnaryOperation($lexer), + TokenType::KEYWORD_TRUE, + TokenType::KEYWORD_FALSE => + $this->parseBooleanLiteral($lexer), + TokenType::KEYWORD_NULL => + $this->parseNullLiteral($lexer), + TokenType::STRING_LITERAL_DELIMITER => + $this->parseStringLiteral($lexer), + TokenType::INTEGER_HEXADECIMAL, + TokenType::INTEGER_DECIMAL, + TokenType::INTEGER_OCTAL, + TokenType::INTEGER_BINARY => + $this->parseIntegerLiteral($lexer), + TokenType::WORD => + $this->parseValueReference($lexer), + TokenType::BRACKET_ANGLE_OPEN => + $this->parseTag($lexer), + TokenType::KEYWORD_MATCH => + $this->parseMatch($lexer), + TokenType::BRACKET_ROUND_OPEN => + $this->parseBracketedExpression($lexer), + default => throw new LogicException() }; } + $lexer->skipSpaceAndComments(); + return $result; } - /** - * @param \Iterator $tokens - * @return ExpressionNode - */ - private function parseUnaryOperation(\Iterator &$tokens): ExpressionNode + private function parseUnaryOperation(Lexer $lexer): ExpressionNode { - $startingToken = $tokens->current(); + $start = $lexer->getStartPosition(); - $operator = $this->parseUnaryOperator($tokens); - $operand = $this->parseUnaryStatement($tokens); + $operator = $this->parseUnaryOperator($lexer); + $operand = $this->parseUnaryStatement($lexer); $unaryOperationNode = new UnaryOperationNode( rangeInSource: Range::from( - $startingToken->boundaries->start, + $start, $operand->rangeInSource->end ), operator: $operator, @@ -202,75 +202,29 @@ private function parseUnaryOperation(\Iterator &$tokens): ExpressionNode ); } - /** - * @param \Iterator $tokens - * @return UnaryOperator - */ - private function parseUnaryOperator(\Iterator &$tokens): UnaryOperator + private function parseUnaryOperator(Lexer $lexer): UnaryOperator { - $unaryOperator = match (Scanner::type($tokens)) { - TokenType::OPERATOR_BOOLEAN_NOT => UnaryOperator::NOT, - default => throw ExpressionCouldNotBeParsed::becauseOfUnexpectedToken( - expectedTokenTypes: TokenTypes::from(TokenType::OPERATOR_BOOLEAN_NOT), - actualToken: $tokens->current() - ) - }; + $lexer->read(TokenType::SYMBOL_EXCLAMATIONMARK); - Scanner::skipOne($tokens); + $unaryOperator = UnaryOperator::NOT; - return $unaryOperator; - } - - private function withStopAt(TokenType $stopAt): self - { - $newExpressionParser = clone $this; - $newExpressionParser->stopAt = $stopAt; + $lexer->skipSpaceAndComments(); - return $newExpressionParser; + return $unaryOperator; } private function withPrecedence(Precedence $precedence): self { - $newExpressionParser = clone $this; - $newExpressionParser->precedence = $precedence; - - return $newExpressionParser; - } - - /** - * @param \Iterator $tokens - * @return boolean - */ - private function shouldStop(\Iterator &$tokens): bool - { - Scanner::skipSpaceAndComments($tokens); - - if (Scanner::isEnd($tokens)) { - return true; - } - - $type = Scanner::type($tokens); - - if ($this->precedence->mustStopAt($type)) { - return true; - } - - if ($this->stopAt && $type === $this->stopAt) { - return true; - } - - return false; + return new self( + precedence: $precedence + ); } - /** - * @param \Iterator $tokens - * @return ExpressionNode - */ - private function parseBooleanLiteral(\Iterator &$tokens): ExpressionNode + private function parseBooleanLiteral(Lexer $lexer): ExpressionNode { $this->booleanLiteralParser ??= BooleanLiteralParser::singleton(); - $booleanLiteralNode = $this->booleanLiteralParser->parse($tokens); + $booleanLiteralNode = $this->booleanLiteralParser->parse($lexer); return new ExpressionNode( rangeInSource: $booleanLiteralNode->rangeInSource, @@ -278,16 +232,11 @@ private function parseBooleanLiteral(\Iterator &$tokens): ExpressionNode ); } - /** - * @param \Iterator $tokens - * @return ExpressionNode - */ - private function parseNullLiteral(\Iterator &$tokens): ExpressionNode + private function parseNullLiteral(Lexer $lexer): ExpressionNode { - $this->nullLiteralParser ??= NullLiteralParser::singleton(); - $nullLiteralNode = $this->nullLiteralParser->parse($tokens); + $nullLiteralNode = $this->nullLiteralParser->parse($lexer); return new ExpressionNode( rangeInSource: $nullLiteralNode->rangeInSource, @@ -295,15 +244,11 @@ private function parseNullLiteral(\Iterator &$tokens): ExpressionNode ); } - /** - * @param \Iterator $tokens - * @return ExpressionNode - */ - private function parseStringLiteral(\Iterator &$tokens): ExpressionNode + private function parseStringLiteral(Lexer $lexer): ExpressionNode { $this->stringLiteralParser ??= StringLiteralParser::singleton(); - $stringLiteralNode = $this->stringLiteralParser->parse($tokens); + $stringLiteralNode = $this->stringLiteralParser->parse($lexer); return new ExpressionNode( rangeInSource: $stringLiteralNode->rangeInSource, @@ -311,15 +256,11 @@ private function parseStringLiteral(\Iterator &$tokens): ExpressionNode ); } - /** - * @param \Iterator $tokens - * @return ExpressionNode - */ - private function parseIntegerLiteral(\Iterator &$tokens): ExpressionNode + private function parseIntegerLiteral(Lexer $lexer): ExpressionNode { $this->integerLiteralParser ??= IntegerLiteralParser::singleton(); - $integerLiteralNode = $this->integerLiteralParser->parse($tokens); + $integerLiteralNode = $this->integerLiteralParser->parse($lexer); return new ExpressionNode( rangeInSource: $integerLiteralNode->rangeInSource, @@ -327,15 +268,11 @@ private function parseIntegerLiteral(\Iterator &$tokens): ExpressionNode ); } - /** - * @param \Iterator $tokens - * @return ExpressionNode - */ - private function parseValueReference(\Iterator &$tokens): ExpressionNode + private function parseValueReference(Lexer $lexer): ExpressionNode { $this->valueReferenceParser ??= ValueReferenceParser::singleton(); - $valueReferenceNode = $this->valueReferenceParser->parse($tokens); + $valueReferenceNode = $this->valueReferenceParser->parse($lexer); return new ExpressionNode( rangeInSource: $valueReferenceNode->rangeInSource, @@ -343,15 +280,11 @@ private function parseValueReference(\Iterator &$tokens): ExpressionNode ); } - /** - * @param \Iterator $tokens - * @return ExpressionNode - */ - private function parseTag(\Iterator &$tokens): ExpressionNode + private function parseTag(Lexer $lexer): ExpressionNode { $this->tagParser ??= TagParser::singleton(); - $tagNode = $this->tagParser->parse($tokens); + $tagNode = $this->tagParser->parse($lexer); return new ExpressionNode( rangeInSource: $tagNode->rangeInSource, @@ -359,15 +292,11 @@ private function parseTag(\Iterator &$tokens): ExpressionNode ); } - /** - * @param \Iterator $tokens - * @return ExpressionNode - */ - private function parseTemplateLiteral(\Iterator &$tokens): ExpressionNode + private function parseTemplateLiteral(Lexer $lexer): ExpressionNode { $this->templateLiteralParser ??= TemplateLiteralParser::singleton(); - $templateLiteralNode = $this->templateLiteralParser->parse($tokens); + $templateLiteralNode = $this->templateLiteralParser->parse($lexer); return new ExpressionNode( rangeInSource: $templateLiteralNode->rangeInSource, @@ -375,15 +304,11 @@ private function parseTemplateLiteral(\Iterator &$tokens): ExpressionNode ); } - /** - * @param \Iterator $tokens - * @return ExpressionNode - */ - private function parseMatch(\Iterator &$tokens): ExpressionNode + private function parseMatch(Lexer $lexer): ExpressionNode { $this->matchParser ??= MatchParser::singleton(); - $matchNode = $this->matchParser->parse($tokens); + $matchNode = $this->matchParser->parse($lexer); return new ExpressionNode( rangeInSource: $matchNode->rangeInSource, @@ -391,56 +316,35 @@ private function parseMatch(\Iterator &$tokens): ExpressionNode ); } - /** - * @param \Iterator $tokens - * @return ExpressionNode - */ - private function parseBracketedExpression(\Iterator &$tokens): ExpressionNode + private function parseBracketedExpression(Lexer $lexer): ExpressionNode { - Scanner::assertType($tokens, TokenType::BRACKET_ROUND_OPEN); - - $openingBracketToken = $tokens->current(); - - Scanner::skipOne($tokens); - Scanner::skipSpaceAndComments($tokens); - - $innerExpressionNode = $this->withStopAt(TokenType::BRACKET_ROUND_CLOSE)->parse($tokens); + $lexer->read(TokenType::BRACKET_ROUND_OPEN); + $start = $lexer->getStartPosition(); + $lexer->skipSpaceAndComments(); - Scanner::assertType($tokens, TokenType::BRACKET_ROUND_CLOSE); + $innerExpressionNode = $this->parse($lexer); - $closingBracketToken = $tokens->current(); - - Scanner::skipOne($tokens); - Scanner::skipSpaceAndComments($tokens); + $lexer->read(TokenType::BRACKET_ROUND_CLOSE); + $end = $lexer->getEndPosition(); + $lexer->skipSpaceAndComments(); return new ExpressionNode( - rangeInSource: Range::from( - $openingBracketToken->boundaries->start, - $closingBracketToken->boundaries->end - ), + rangeInSource: Range::from($start, $end), root: $innerExpressionNode->root ); } - /** - * @param \Iterator $tokens - * @param ExpressionNode $parent - * @return ExpressionNode - */ - private function parseAcccess(\Iterator &$tokens, ExpressionNode $parent): ExpressionNode + private function parseAcccess(Lexer $lexer, ExpressionNode $parent): ExpressionNode { - $accessTokenTypes = TokenTypes::from(TokenType::PERIOD, TokenType::OPTCHAIN); - - while (!Scanner::isEnd($tokens) && $accessTokenTypes->contains(Scanner::type($tokens))) { - $type = $this->parseAccessType($tokens); + while (!$lexer->isEnd()) { + $type = $this->parseAccessType($lexer); - Scanner::assertType($tokens, TokenType::STRING); - $keyToken = $tokens->current(); - Scanner::skipOne($tokens); + $lexer->read(TokenType::WORD); + $keyToken = $lexer->getTokenUnderCursor(); $rangeInSource = Range::from( $parent->rangeInSource->start, - $keyToken->boundaries->end + $keyToken->rangeInSource->end ); $parent = new ExpressionNode( @@ -450,47 +354,37 @@ private function parseAcccess(\Iterator &$tokens, ExpressionNode $parent): Expre parent: $parent, type: $type, key: new AccessKeyNode( - rangeInSource: $keyToken->boundaries, + rangeInSource: $keyToken->rangeInSource, value: PropertyName::from($keyToken->value) ) ) ); + + $lexer->skipSpaceAndComments(); + + if (!$lexer->probeOneOf(self::$TOKEN_TYPES_ACCESS)) { + break; + } } return $parent; } - /** - * @param \Iterator $tokens - * @return AccessType - */ - private function parseAccessType(\Iterator &$tokens): AccessType + private function parseAccessType(Lexer $lexer): AccessType { - $accessType = match (Scanner::type($tokens)) { - TokenType::PERIOD => AccessType::MANDATORY, - TokenType::OPTCHAIN => AccessType::OPTIONAL, - default => throw ExpressionCouldNotBeParsed::becauseOfUnexpectedToken( - expectedTokenTypes: TokenTypes::from(TokenType::PERIOD, TokenType::OPTCHAIN), - actualToken: $tokens->current() - ) + return match ($lexer->getTokenTypeUnderCursor()) { + TokenType::SYMBOL_PERIOD => AccessType::MANDATORY, + TokenType::SYMBOL_OPTCHAIN => AccessType::OPTIONAL, + default => throw new LogicException() }; - - Scanner::skipOne($tokens); - - return $accessType; } - /** - * @param \Iterator $tokens - * @param ExpressionNode $leftOperand - * @return ExpressionNode - */ - private function parseBinaryOperation(\Iterator &$tokens, ExpressionNode $leftOperand): ExpressionNode + private function parseBinaryOperation(Lexer $lexer, ExpressionNode $leftOperand): ExpressionNode { - $operator = $this->parseBinaryOperator($tokens); + $operator = $this->parseBinaryOperator($lexer); $rightOperand = $this ->withPrecedence(Precedence::forBinaryOperator($operator)) - ->parse($tokens); + ->parse($lexer); $rangeInSource = Range::from( $leftOperand->rangeInSource->start, $rightOperand->rangeInSource->end @@ -507,60 +401,46 @@ private function parseBinaryOperation(\Iterator &$tokens, ExpressionNode $leftOp ); } - /** - * @param \Iterator $tokens - * @return BinaryOperator - */ - private function parseBinaryOperator(\Iterator &$tokens): BinaryOperator + private function parseBinaryOperator(Lexer $lexer): BinaryOperator { - $operator = match (Scanner::type($tokens)) { - TokenType::OPERATOR_BOOLEAN_AND => BinaryOperator::AND, - TokenType::OPERATOR_BOOLEAN_OR => BinaryOperator::OR, - TokenType::COMPARATOR_EQUAL => BinaryOperator::EQUAL, - TokenType::COMPARATOR_NOT_EQUAL => BinaryOperator::NOT_EQUAL, - TokenType::COMPARATOR_GREATER_THAN => BinaryOperator::GREATER_THAN, - TokenType::COMPARATOR_GREATER_THAN_OR_EQUAL => BinaryOperator::GREATER_THAN_OR_EQUAL, - TokenType::COMPARATOR_LESS_THAN => BinaryOperator::LESS_THAN, - TokenType::COMPARATOR_LESS_THAN_OR_EQUAL => BinaryOperator::LESS_THAN_OR_EQUAL, - default => throw ExpressionCouldNotBeParsed::becauseOfUnexpectedToken( - expectedTokenTypes: TokenTypes::from( - TokenType::OPERATOR_BOOLEAN_AND, - TokenType::OPERATOR_BOOLEAN_OR, - TokenType::COMPARATOR_EQUAL, - TokenType::COMPARATOR_NOT_EQUAL, - TokenType::COMPARATOR_GREATER_THAN, - TokenType::COMPARATOR_GREATER_THAN_OR_EQUAL, - TokenType::COMPARATOR_LESS_THAN, - TokenType::COMPARATOR_LESS_THAN_OR_EQUAL - ), - actualToken: $tokens->current() - ) + if ($lexer->probe(TokenType::SYMBOL_GREATER_THAN_OR_EQUAL)) { + $lexer->skipSpaceAndComments(); + return BinaryOperator::GREATER_THAN_OR_EQUAL; + } + + if ($lexer->probe(TokenType::SYMBOL_LESS_THAN_OR_EQUAL)) { + $lexer->skipSpaceAndComments(); + return BinaryOperator::LESS_THAN_OR_EQUAL; + } + + $lexer->readOneOf(self::$TOKEN_TYPES_BINARY_OPERATORS); + $operator = match ($lexer->getTokenTypeUnderCursor()) { + TokenType::SYMBOL_NULLISH_COALESCE => BinaryOperator::NULLISH_COALESCE, + TokenType::SYMBOL_BOOLEAN_AND => BinaryOperator::AND, + TokenType::SYMBOL_BOOLEAN_OR => BinaryOperator::OR, + TokenType::SYMBOL_STRICT_EQUALS => BinaryOperator::EQUAL, + TokenType::SYMBOL_NOT_EQUALS => BinaryOperator::NOT_EQUAL, + TokenType::SYMBOL_GREATER_THAN => BinaryOperator::GREATER_THAN, + TokenType::SYMBOL_LESS_THAN => BinaryOperator::LESS_THAN, + default => throw new LogicException() }; - Scanner::skipOne($tokens); - Scanner::skipSpaceAndComments($tokens); + $lexer->skipSpaceAndComments(); return $operator; } - /** - * @param \Iterator $tokens - * @param ExpressionNode $condition - * @return ExpressionNode - */ - private function parseTernaryOperation(\Iterator &$tokens, ExpressionNode $condition): ExpressionNode + private function parseTernaryOperation(Lexer $lexer, ExpressionNode $condition): ExpressionNode { - Scanner::assertType($tokens, TokenType::QUESTIONMARK); - Scanner::skipOne($tokens); - Scanner::skipSpaceAndComments($tokens); + $lexer->read(TokenType::SYMBOL_QUESTIONMARK); + $lexer->skipSpaceAndComments(); - $trueBranch = $this->withStopAt(TokenType::COLON)->parse($tokens); + $trueBranch = $this->parse($lexer); - Scanner::assertType($tokens, TokenType::COLON); - Scanner::skipOne($tokens); - Scanner::skipSpaceAndComments($tokens); + $lexer->read(TokenType::SYMBOL_COLON); + $lexer->skipSpaceAndComments(); - $falseBranch = $this->parse($tokens); + $falseBranch = $this->parse($lexer); $root = new TernaryOperationNode( condition: $condition, diff --git a/src/Language/Parser/Expression/Precedence.php b/src/Language/Parser/Expression/Precedence.php index a96d155..06c0b17 100644 --- a/src/Language/Parser/Expression/Precedence.php +++ b/src/Language/Parser/Expression/Precedence.php @@ -23,7 +23,7 @@ namespace PackageFactory\ComponentEngine\Language\Parser\Expression; use PackageFactory\ComponentEngine\Language\AST\Node\BinaryOperation\BinaryOperator; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; enum Precedence: int { @@ -47,25 +47,26 @@ public static function forTokenType(TokenType $tokenType): self TokenType::BRACKET_ROUND_CLOSE, TokenType::BRACKET_SQUARE_OPEN, TokenType::BRACKET_SQUARE_CLOSE, - TokenType::OPTCHAIN, - TokenType::PERIOD => self::ACCESS, + TokenType::SYMBOL_OPTCHAIN, + TokenType::SYMBOL_PERIOD => self::ACCESS, - TokenType::OPERATOR_BOOLEAN_NOT => self::UNARY, + TokenType::SYMBOL_EXCLAMATIONMARK => self::UNARY, - TokenType::COMPARATOR_GREATER_THAN, - TokenType::COMPARATOR_GREATER_THAN_OR_EQUAL, - TokenType::COMPARATOR_LESS_THAN, - TokenType::COMPARATOR_LESS_THAN_OR_EQUAL => self::COMPARISON, + TokenType::SYMBOL_GREATER_THAN, + TokenType::SYMBOL_GREATER_THAN_OR_EQUAL, + TokenType::SYMBOL_LESS_THAN, + TokenType::SYMBOL_LESS_THAN_OR_EQUAL => self::COMPARISON, - TokenType::COMPARATOR_EQUAL, - TokenType::COMPARATOR_NOT_EQUAL => self::EQUALITY, + TokenType::SYMBOL_STRICT_EQUALS, + TokenType::SYMBOL_NOT_EQUALS => self::EQUALITY, - TokenType::OPERATOR_BOOLEAN_AND => self::LOGICAL_AND, + TokenType::SYMBOL_BOOLEAN_AND => self::LOGICAL_AND, - TokenType::OPERATOR_BOOLEAN_OR => self::LOGICAL_OR, + TokenType::SYMBOL_NULLISH_COALESCE, + TokenType::SYMBOL_BOOLEAN_OR => self::LOGICAL_OR, - TokenType::QUESTIONMARK, - TokenType::COLON => self::TERNARY, + TokenType::SYMBOL_QUESTIONMARK, + TokenType::SYMBOL_COLON => self::TERNARY, default => self::SEQUENCE }; @@ -75,6 +76,8 @@ public static function forBinaryOperator(BinaryOperator $binaryOperator): self { return match ($binaryOperator) { BinaryOperator::AND => self::LOGICAL_AND, + + BinaryOperator::NULLISH_COALESCE, BinaryOperator::OR => self::LOGICAL_OR, BinaryOperator::EQUAL, diff --git a/src/Language/Parser/Import/ImportCouldNotBeParsed.php b/src/Language/Parser/Import/ImportCouldNotBeParsed.php index 38dd103..d2eee6a 100644 --- a/src/Language/Parser/Import/ImportCouldNotBeParsed.php +++ b/src/Language/Parser/Import/ImportCouldNotBeParsed.php @@ -28,16 +28,15 @@ final class ImportCouldNotBeParsed extends ParserException { + protected const TITLE = 'Import could not be parsed'; + public static function becauseOfInvalidImportedNameNodes( InvalidImportedNameNodes $cause, Range $affectedRangeInSource ): self { return new self( code: 1691181627, - message: sprintf( - 'Import could not be parsed, because of invalid imported names: %s', - $cause->getMessage() - ), + message: $cause->getMessage(), affectedRangeInSource: $cause->affectedRangeInSource ?? $affectedRangeInSource ); } diff --git a/src/Language/Parser/Import/ImportParser.php b/src/Language/Parser/Import/ImportParser.php index b10a2cc..92381b6 100644 --- a/src/Language/Parser/Import/ImportParser.php +++ b/src/Language/Parser/Import/ImportParser.php @@ -29,120 +29,103 @@ use PackageFactory\ComponentEngine\Language\AST\Node\Import\ImportNode; use PackageFactory\ComponentEngine\Language\AST\Node\Import\InvalidImportedNameNodes; use PackageFactory\ComponentEngine\Language\AST\Node\StringLiteral\StringLiteralNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\LexerException; +use PackageFactory\ComponentEngine\Language\Lexer\Token\Token; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; use PackageFactory\ComponentEngine\Language\Parser\StringLiteral\StringLiteralParser; use PackageFactory\ComponentEngine\Parser\Source\Range; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Scanner; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; final class ImportParser { use Singleton; + private static TokenTypes $TOKEN_TYPES_NAME_BOUNDARIES; + private ?StringLiteralParser $pathParser = null; - /** - * @param \Iterator $tokens - * @return ImportNode - */ - public function parse(\Iterator &$tokens): ImportNode + private function __construct() { - $fromKeywordToken = $this->extractToken($tokens, TokenType::KEYWORD_FROM); - $path = $this->parsePath($tokens); - - $this->skipToken($tokens, TokenType::KEYWORD_IMPORT); - $openingBracketToken = $this->extractToken($tokens, TokenType::BRACKET_CURLY_OPEN); + self::$TOKEN_TYPES_NAME_BOUNDARIES ??= TokenTypes::from( + TokenType::WORD, + TokenType::SYMBOL_COMMA, + TokenType::BRACKET_CURLY_CLOSE + ); + } + public function parse(Lexer $lexer): ImportNode + { try { - $names = $this->parseNames($tokens); - $closingBracketToken = $this->extractToken($tokens, TokenType::BRACKET_CURLY_CLOSE); + $lexer->read(TokenType::KEYWORD_FROM); + $start = $lexer->getStartPosition(); + $lexer->skipSpace(); + + $path = $this->parsePath($lexer); + + $lexer->read(TokenType::KEYWORD_IMPORT); + $lexer->skipSpace(); + + $names = $this->parseNames($lexer); + $end = $lexer->getEndPosition(); return new ImportNode( - rangeInSource: Range::from( - $fromKeywordToken->boundaries->start, - $closingBracketToken->boundaries->end - ), + rangeInSource: Range::from($start, $end), path: $path, names: $names ); - } catch (InvalidImportedNameNodes $e) { - throw ImportCouldNotBeParsed::becauseOfInvalidImportedNameNodes( - cause: $e, - affectedRangeInSource: $openingBracketToken->boundaries - ); + } catch (LexerException $e) { + throw ImportCouldNotBeParsed::becauseOfLexerException($e); } } - /** - * @param \Iterator $tokens - * @param TokenType $tokenType - * @return Token - */ - private function extractToken(\Iterator &$tokens, TokenType $tokenType): Token - { - Scanner::assertType($tokens, $tokenType); - $token = $tokens->current(); - Scanner::skipOne($tokens); - Scanner::skipSpace($tokens); - - return $token; - } - - /** - * @param \Iterator $tokens - * @param TokenType $tokenType - * @return void - */ - private function skipToken(\Iterator &$tokens, TokenType $tokenType): void - { - Scanner::assertType($tokens, $tokenType); - Scanner::skipOne($tokens); - Scanner::skipSpace($tokens); - } - - /** - * @param \Iterator $tokens - * @return StringLiteralNode - */ - private function parsePath(\Iterator &$tokens): StringLiteralNode + private function parsePath(Lexer $lexer): StringLiteralNode { $this->pathParser ??= StringLiteralParser::singleton(); - $path = $this->pathParser->parse($tokens); - Scanner::skipSpace($tokens); + $path = $this->pathParser->parse($lexer); + $lexer->skipSpace(); return $path; } - /** - * @param \Iterator $tokens - * @return ImportedNameNodes - */ - private function parseNames(\Iterator &$tokens): ImportedNameNodes + private function parseNames(Lexer $lexer): ImportedNameNodes { - $items = []; - while (Scanner::type($tokens) !== TokenType::BRACKET_CURLY_CLOSE) { - $items[] = $this->parseName($tokens); - - if (Scanner::type($tokens) !== TokenType::BRACKET_CURLY_CLOSE) { - $this->skipToken($tokens, TokenType::COMMA); + $lexer->read(TokenType::BRACKET_CURLY_OPEN); + $start = $lexer->getStartPosition(); + $lexer->skipSpaceAndComments(); + + $nameTokens = []; + while (!$lexer->peek(TokenType::BRACKET_CURLY_CLOSE)) { + $lexer->read(TokenType::WORD); + $nameTokens[] = $lexer->getTokenUnderCursor(); + + $lexer->skipSpaceAndComments(); + if ($lexer->probe(TokenType::SYMBOL_COMMA)) { + $lexer->skipSpaceAndComments(); + } else { + break; } } - return new ImportedNameNodes(...$items); - } - - /** - * @param \Iterator $tokens - * @return ImportedNameNode - */ - private function parseName(\Iterator &$tokens): ImportedNameNode - { - $nameToken = $this->extractToken($tokens, TokenType::STRING); + $lexer->read(TokenType::BRACKET_CURLY_CLOSE); + $end = $lexer->getEndPosition(); - return new ImportedNameNode( - rangeInSource: $nameToken->boundaries, - value: VariableName::from($nameToken->value) - ); + try { + return new ImportedNameNodes( + ...array_map( + static fn (Token $nameToken) => new ImportedNameNode( + rangeInSource: $nameToken->rangeInSource, + value: VariableName::from($nameToken->value) + ), + $nameTokens + ) + ); + } catch (InvalidImportedNameNodes $e) { + throw ImportCouldNotBeParsed::becauseOfInvalidImportedNameNodes( + cause: $e, + affectedRangeInSource: $e->affectedRangeInSource ?? Range::from($start, $end) + ); + } } } diff --git a/src/Language/Parser/IntegerLiteral/IntegerLiteralCouldNotBeParsed.php b/src/Language/Parser/IntegerLiteral/IntegerLiteralCouldNotBeParsed.php index b1d5dd3..4c37470 100644 --- a/src/Language/Parser/IntegerLiteral/IntegerLiteralCouldNotBeParsed.php +++ b/src/Language/Parser/IntegerLiteral/IntegerLiteralCouldNotBeParsed.php @@ -23,32 +23,8 @@ namespace PackageFactory\ComponentEngine\Language\Parser\IntegerLiteral; use PackageFactory\ComponentEngine\Language\Parser\ParserException; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenTypes; final class IntegerLiteralCouldNotBeParsed extends ParserException { - public static function becauseOfUnexpectedEndOfFile(): self - { - return new self( - code: 1691238474, - message: 'Integer literal could not be parsed because of unexpected end of file.' - ); - } - - public static function becauseOfUnexpectedToken( - TokenTypes $expectedTokenTypes, - Token $actualToken - ): self { - return new self( - code: 1691238491, - message: sprintf( - 'Integer literal could not be parsed because of unexpected token %s. ' - . 'Expected %s instead.', - $actualToken->toDebugString(), - $expectedTokenTypes->toDebugString() - ), - affectedRangeInSource: $actualToken->boundaries - ); - } + protected const TITLE = 'Integer literal could not be parsed'; } diff --git a/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php b/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php index 4f34d1b..3c6f4f9 100644 --- a/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php +++ b/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php @@ -22,55 +22,62 @@ namespace PackageFactory\ComponentEngine\Language\Parser\IntegerLiteral; +use LogicException; use PackageFactory\ComponentEngine\Framework\PHP\Singleton\Singleton; use PackageFactory\ComponentEngine\Language\AST\Node\IntegerLiteral\IntegerFormat; use PackageFactory\ComponentEngine\Language\AST\Node\IntegerLiteral\IntegerLiteralNode; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Scanner; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenTypes; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\LexerException; +use PackageFactory\ComponentEngine\Language\Lexer\Token\Token; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; +use PackageFactory\ComponentEngine\Language\Util\DebugHelper; final class IntegerLiteralParser { use Singleton; - /** - * @param \Iterator $tokens - * @return IntegerLiteralNode - */ - public function parse(\Iterator &$tokens): IntegerLiteralNode - { - if (Scanner::isEnd($tokens)) { - throw IntegerLiteralCouldNotBeParsed::becauseOfUnexpectedEndOfFile(); - } + private static TokenTypes $INTEGER_TOKEN_TYPES; - $token = $tokens->current(); + private function __construct() + { + self::$INTEGER_TOKEN_TYPES ??= TokenTypes::from( + TokenType::INTEGER_HEXADECIMAL, + TokenType::INTEGER_DECIMAL, + TokenType::INTEGER_OCTAL, + TokenType::INTEGER_BINARY + ); + } - Scanner::skipOne($tokens); + public function parse(Lexer $lexer): IntegerLiteralNode + { + try { + $lexer->readOneOf(self::$INTEGER_TOKEN_TYPES); + $token = $lexer->getTokenUnderCursor(); - return new IntegerLiteralNode( - rangeInSource: $token->boundaries, - format: $this->getIntegerFormatFromToken($token), - value: $token->value - ); + return new IntegerLiteralNode( + rangeInSource: $token->rangeInSource, + format: $this->getIntegerFormatFromToken($token), + value: $token->value + ); + } catch (LexerException $e) { + throw IntegerLiteralCouldNotBeParsed::becauseOfLexerException($e); + } } private function getIntegerFormatFromToken(Token $token): IntegerFormat { return match ($token->type) { - TokenType::NUMBER_BINARY => IntegerFormat::BINARY, - TokenType::NUMBER_OCTAL => IntegerFormat::OCTAL, - TokenType::NUMBER_DECIMAL => IntegerFormat::DECIMAL, - TokenType::NUMBER_HEXADECIMAL => IntegerFormat::HEXADECIMAL, - - default => throw IntegerLiteralCouldNotBeParsed::becauseOfUnexpectedToken( - expectedTokenTypes: TokenTypes::from( - TokenType::NUMBER_BINARY, - TokenType::NUMBER_OCTAL, - TokenType::NUMBER_DECIMAL, - TokenType::NUMBER_HEXADECIMAL - ), - actualToken: $token + TokenType::INTEGER_BINARY => IntegerFormat::BINARY, + TokenType::INTEGER_OCTAL => IntegerFormat::OCTAL, + TokenType::INTEGER_DECIMAL => IntegerFormat::DECIMAL, + TokenType::INTEGER_HEXADECIMAL => IntegerFormat::HEXADECIMAL, + default => throw new LogicException( + sprintf( + 'Expected %s to be one of %s', + $token->type->value, + DebugHelper::describeTokenTypes($this->INTEGER_TOKEN_TYPES) + ) ) }; } diff --git a/src/Language/Parser/Match/MatchParser.php b/src/Language/Parser/Match/MatchParser.php index dab6678..d9a2a84 100644 --- a/src/Language/Parser/Match/MatchParser.php +++ b/src/Language/Parser/Match/MatchParser.php @@ -29,11 +29,10 @@ use PackageFactory\ComponentEngine\Language\AST\Node\Match\MatchArmNode; use PackageFactory\ComponentEngine\Language\AST\Node\Match\MatchArmNodes; use PackageFactory\ComponentEngine\Language\AST\Node\Match\MatchNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; use PackageFactory\ComponentEngine\Language\Parser\Expression\ExpressionParser; use PackageFactory\ComponentEngine\Parser\Source\Range; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Scanner; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; final class MatchParser { @@ -43,196 +42,100 @@ final class MatchParser private ?ExpressionParser $matchArmLeftParser = null; private ?ExpressionParser $matchArmRightParser = null; - /** - * @param \Iterator $tokens - * @return MatchNode - */ - public function parse(\Iterator &$tokens): MatchNode + public function parse(Lexer $lexer): MatchNode { - $matchKeywordToken = $this->extractMatchKeywordToken($tokens); - $subject = $this->parseSubject($tokens); - - $this->skipOpeningBracketToken($tokens); - - try { - $arms = $this->parseArms($tokens); - - Scanner::assertType($tokens, TokenType::BRACKET_CURLY_CLOSE); - $closingBracketToken = $tokens->current(); - Scanner::skipOne($tokens); - - return new MatchNode( - rangeInSource: Range::from( - $matchKeywordToken->boundaries->start, - $closingBracketToken->boundaries->end - ), - subject: $subject, - arms: $arms - ); - } catch (InvalidMatchArmNodes $e) { - throw MatchCouldNotBeParsed::becauseOfInvalidMatchArmNodes( - cause: $e, - affectedRangeInSource: $matchKeywordToken->boundaries - ); - } - } - - /** - * @param \Iterator $tokens - * @return Token - */ - private function extractMatchKeywordToken(\Iterator &$tokens): Token - { - Scanner::assertType($tokens, TokenType::KEYWORD_MATCH); - - $matchKeywordToken = $tokens->current(); - - Scanner::skipOne($tokens); - Scanner::skipSpace($tokens); - - return $matchKeywordToken; + $lexer->read(TokenType::KEYWORD_MATCH); + $start = $lexer->getStartPosition(); + $lexer->skipSpace(); + + $subject = $this->parseSubject($lexer); + $arms = $this->parseArms($lexer); + $end = $lexer->getEndPosition(); + + return new MatchNode( + rangeInSource: Range::from($start, $end), + subject: $subject, + arms: $arms + ); } - /** - * @param \Iterator $tokens - * @return ExpressionNode - */ - private function parseSubject(\Iterator &$tokens): ExpressionNode + private function parseSubject(Lexer $lexer): ExpressionNode { - $this->subjectParser ??= new ExpressionParser( - stopAt: TokenType::BRACKET_CURLY_OPEN - ); + $this->subjectParser ??= new ExpressionParser(); - return $this->subjectParser->parse($tokens); + return $this->subjectParser->parse($lexer); } - /** - * @param \Iterator $tokens - * @return void - */ - private function skipOpeningBracketToken(\Iterator &$tokens): void + private function parseArms(Lexer $lexer): MatchArmNodes { - Scanner::assertType($tokens, TokenType::BRACKET_CURLY_OPEN); - Scanner::skipOne($tokens); - Scanner::skipSpaceAndComments($tokens); - } + $lexer->read(TokenType::BRACKET_CURLY_OPEN); + $start = $lexer->getStartPosition(); - /** - * @param \Iterator $tokens - * @return MatchArmNodes - */ - private function parseArms(\Iterator &$tokens): MatchArmNodes - { $items = []; - while (Scanner::type($tokens) !== TokenType::BRACKET_CURLY_CLOSE) { - $items[] = $this->parseArm($tokens); + while (!$lexer->peek(TokenType::BRACKET_CURLY_CLOSE)) { + $lexer->skipSpaceAndComments(); + $items[] = $this->parseArm($lexer); } - return new MatchArmNodes(...$items); + + $lexer->skipSpaceAndComments(); + $lexer->read(TokenType::BRACKET_CURLY_CLOSE); + $end = $lexer->getEndPosition(); + + try { + return new MatchArmNodes(...$items); + } catch (InvalidMatchArmNodes $e) { + throw MatchCouldNotBeParsed::becauseOfInvalidMatchArmNodes( + cause: $e, + affectedRangeInSource: $e->affectedRangeInSource ?? Range::from($start, $end) + ); + } } - /** - * @param \Iterator $tokens - * @return MatchArmNode - */ - private function parseArm(\Iterator &$tokens): MatchArmNode + private function parseArm(Lexer $lexer): MatchArmNode { - $defaultKeywordToken = $this->extractDefaultKeywordToken($tokens); - $left = is_null($defaultKeywordToken) ? $this->parseArmLeft($tokens) : null; + $left = $this->parseArmLeft($lexer); + $start = $left?->items[0]?->rangeInSource->start ?? + $lexer->getStartPosition(); - $this->skipArrowSingleToken($tokens); + $lexer->skipSpaceAndComments(); + $lexer->read(TokenType::SYMBOL_ARROW_SINGLE); + $lexer->skipSpaceAndComments(); - $right = $this->parseArmRight($tokens); - - if (is_null($defaultKeywordToken)) { - assert($left !== null); - $start = $left->items[0]->rangeInSource->start; - } else { - $start = $defaultKeywordToken->boundaries->start; - } + $right = $this->parseArmRight($lexer); + $lexer->skipSpaceAndComments(); return new MatchArmNode( - rangeInSource: Range::from( - $start, - $right->rangeInSource->end - ), + rangeInSource: Range::from($start, $right->rangeInSource->end), left: $left, right: $right ); } - /** - * @param \Iterator $tokens - * @return null|Token - */ - private function extractDefaultKeywordToken(\Iterator &$tokens): ?Token + private function parseArmLeft(Lexer $lexer): ?ExpressionNodes { - if (Scanner::type($tokens) === TokenType::KEYWORD_DEFAULT) { - $defaultKeywordToken = $tokens->current(); - Scanner::skipOne($tokens); - Scanner::skipSpaceAndComments($tokens); - - return $defaultKeywordToken; + if ($lexer->probe(TokenType::KEYWORD_DEFAULT)) { + return null; } - return null; - } - - /** - * @param \Iterator $tokens - * @return ExpressionNodes - */ - private function parseArmLeft(\Iterator &$tokens): ExpressionNodes - { - $this->matchArmLeftParser ??= new ExpressionParser( - stopAt: TokenType::ARROW_SINGLE - ); + $this->matchArmLeftParser ??= new ExpressionParser(); $items = []; - while (Scanner::type($tokens) !== TokenType::ARROW_SINGLE) { - assert($this->matchArmLeftParser !== null); - $items[] = $this->matchArmLeftParser->parse($tokens); + do { + $lexer->skipSpaceAndComments(); + $items[] = $this->matchArmLeftParser->parse($lexer); + $lexer->skipSpaceAndComments(); + } while ($lexer->probe(TokenType::SYMBOL_COMMA)); - if (Scanner::type($tokens) !== TokenType::ARROW_SINGLE) { - $this->skipCommaToken($tokens); - } - } + $lexer->skipSpaceAndComments(); return new ExpressionNodes(...$items); } - /** - * @param \Iterator $tokens - * @return void - */ - private function skipCommaToken(\Iterator &$tokens): void - { - Scanner::assertType($tokens, TokenType::COMMA); - Scanner::skipOne($tokens); - Scanner::skipSpaceAndComments($tokens); - } - - /** - * @param \Iterator $tokens - * @return void - */ - private function skipArrowSingleToken(\Iterator &$tokens): void + private function parseArmRight(Lexer $lexer): ExpressionNode { - Scanner::assertType($tokens, TokenType::ARROW_SINGLE); - Scanner::skipOne($tokens); - Scanner::skipSpaceAndComments($tokens); - } - - /** - * @param \Iterator $tokens - * @return ExpressionNode - */ - private function parseArmRight(\Iterator &$tokens): ExpressionNode - { - $this->matchArmRightParser ??= new ExpressionParser( - stopAt: TokenType::BRACKET_CURLY_CLOSE - ); + $this->matchArmRightParser ??= new ExpressionParser(); - return $this->matchArmRightParser->parse($tokens); + return $this->matchArmRightParser->parse($lexer); } } diff --git a/src/Language/Parser/Module/ModuleCouldNotBeParsed.php b/src/Language/Parser/Module/ModuleCouldNotBeParsed.php index fac7186..4817cb5 100644 --- a/src/Language/Parser/Module/ModuleCouldNotBeParsed.php +++ b/src/Language/Parser/Module/ModuleCouldNotBeParsed.php @@ -27,16 +27,5 @@ final class ModuleCouldNotBeParsed extends ParserException { - public static function becauseOfUnexpectedExceedingToken( - Token $exceedingToken - ): self { - return new self( - code: 1691235933, - message: sprintf( - 'Module could not be parsed because of unexpected exceeding token %s.', - $exceedingToken->toDebugString() - ), - affectedRangeInSource: $exceedingToken->boundaries - ); - } + protected const TITLE = 'Module could not be parsed'; } diff --git a/src/Language/Parser/Module/ModuleParser.php b/src/Language/Parser/Module/ModuleParser.php index d45f64c..eb6da24 100644 --- a/src/Language/Parser/Module/ModuleParser.php +++ b/src/Language/Parser/Module/ModuleParser.php @@ -27,13 +27,13 @@ use PackageFactory\ComponentEngine\Language\AST\Node\Import\ImportNode; use PackageFactory\ComponentEngine\Language\AST\Node\Import\ImportNodes; use PackageFactory\ComponentEngine\Language\AST\Node\Module\ModuleNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\LexerException; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; use PackageFactory\ComponentEngine\Language\Parser\Export\ExportParser; use PackageFactory\ComponentEngine\Language\Parser\Import\ImportParser; use PackageFactory\ComponentEngine\Parser\Source\Position; use PackageFactory\ComponentEngine\Parser\Source\Range; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Scanner; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; final class ModuleParser { @@ -42,71 +42,56 @@ final class ModuleParser private ?ImportParser $importParser = null; private ?ExportParser $exportParser = null; - /** - * @param \Iterator $tokens - * @return ModuleNode - */ - public function parse(\Iterator &$tokens): ModuleNode + public function parse(Lexer $lexer): ModuleNode { - Scanner::skipSpaceAndComments($tokens); - - $imports = $this->parseImports($tokens); - $export = $this->parseExport($tokens); - - if (!Scanner::isEnd($tokens)) { - throw ModuleCouldNotBeParsed::becauseOfUnexpectedExceedingToken( - exceedingToken: $tokens->current() + try { + $lexer->skipSpaceAndComments(); + + $imports = $this->parseImports($lexer); + $export = $this->parseExport($lexer); + + $lexer->skipSpaceAndComments(); + $lexer->assertIsEnd(); + + return new ModuleNode( + rangeInSource: Range::from( + new Position(0, 0), + $export->rangeInSource->end + ), + imports: $imports, + export: $export ); + } catch (LexerException $e) { + throw ModuleCouldNotBeParsed::becauseOfLexerException($e); } - - return new ModuleNode( - rangeInSource: Range::from( - new Position(0, 0), - $export->rangeInSource->end - ), - imports: $imports, - export: $export - ); } - /** - * @param \Iterator $tokens - * @return ImportNodes - */ - private function parseImports(\Iterator &$tokens): ImportNodes + private function parseImports(Lexer $lexer): ImportNodes { $items = []; - while (Scanner::type($tokens) !== TokenType::KEYWORD_EXPORT) { - $items[] = $this->parseImport($tokens); + while ($lexer->peek(TokenType::KEYWORD_FROM)) { + $items[] = $this->parseImport($lexer); } return new ImportNodes(...$items); } - /** - * @param \Iterator $tokens - * @return ImportNode - */ - private function parseImport(\Iterator &$tokens): ImportNode + private function parseImport(Lexer $lexer): ImportNode { $this->importParser ??= ImportParser::singleton(); - $import = $this->importParser->parse($tokens); - Scanner::skipSpaceAndComments($tokens); + $import = $this->importParser->parse($lexer); + $lexer->skipSpaceAndComments(); return $import; } - /** - * @param \Iterator $tokens - * @return ExportNode - */ - private function parseExport(\Iterator &$tokens): ExportNode + private function parseExport(Lexer $lexer): ExportNode { $this->exportParser ??= ExportParser::singleton(); - $export = $this->exportParser->parse($tokens); - Scanner::skipSpaceAndComments($tokens); + $export = $this->exportParser->parse($lexer); + $lexer->skipSpaceAndComments(); return $export; } diff --git a/src/Language/Parser/NullLiteral/NullLiteralParser.php b/src/Language/Parser/NullLiteral/NullLiteralParser.php index 69f17b2..8bb526a 100644 --- a/src/Language/Parser/NullLiteral/NullLiteralParser.php +++ b/src/Language/Parser/NullLiteral/NullLiteralParser.php @@ -24,28 +24,20 @@ use PackageFactory\ComponentEngine\Framework\PHP\Singleton\Singleton; use PackageFactory\ComponentEngine\Language\AST\Node\NullLiteral\NullLiteralNode; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Scanner; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; final class NullLiteralParser { use Singleton; - /** - * @param \Iterator $tokens - * @return NullLiteralNode - */ - public function parse(\Iterator &$tokens): NullLiteralNode + public function parse(Lexer $lexer): NullLiteralNode { - Scanner::assertType($tokens, TokenType::KEYWORD_NULL); - - $token = $tokens->current(); - - Scanner::skipOne($tokens); + $lexer->read(TokenType::KEYWORD_NULL); + $token = $lexer->getTokenUnderCursor(); return new NullLiteralNode( - rangeInSource: $token->boundaries + rangeInSource: $token->rangeInSource ); } } diff --git a/src/Language/Parser/ParserException.php b/src/Language/Parser/ParserException.php index e58224f..048349a 100644 --- a/src/Language/Parser/ParserException.php +++ b/src/Language/Parser/ParserException.php @@ -22,16 +22,29 @@ namespace PackageFactory\ComponentEngine\Language\Parser; +use PackageFactory\ComponentEngine\Language\Lexer\LexerException; use PackageFactory\ComponentEngine\Parser\Source\Range; abstract class ParserException extends \Exception { + protected const TITLE = 'Parser failed'; + final protected function __construct( int $code, string $message, public readonly ?Range $affectedRangeInSource = null, ?\Exception $cause = null ) { - parent::__construct($message, $code, $cause); + parent::__construct(static::TITLE . ': ' . $message, $code, $cause); + } + + public static function becauseOfLexerException(LexerException $cause): static + { + return new static( + code: 1691238491, + message: $cause->getMessage(), + affectedRangeInSource: $cause->affectedRangeInSource, + cause: $cause + ); } } diff --git a/src/Language/Parser/PropertyDeclaration/PropertyDeclarationParser.php b/src/Language/Parser/PropertyDeclaration/PropertyDeclarationParser.php index 9a41d0c..18aef6b 100644 --- a/src/Language/Parser/PropertyDeclaration/PropertyDeclarationParser.php +++ b/src/Language/Parser/PropertyDeclaration/PropertyDeclarationParser.php @@ -26,11 +26,10 @@ use PackageFactory\ComponentEngine\Framework\PHP\Singleton\Singleton; use PackageFactory\ComponentEngine\Language\AST\Node\PropertyDeclaration\PropertyDeclarationNode; use PackageFactory\ComponentEngine\Language\AST\Node\PropertyDeclaration\PropertyNameNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; use PackageFactory\ComponentEngine\Language\Parser\TypeReference\TypeReferenceParser; use PackageFactory\ComponentEngine\Parser\Source\Range; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Scanner; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; final class PropertyDeclarationParser { @@ -38,32 +37,24 @@ final class PropertyDeclarationParser private ?TypeReferenceParser $typeReferenceParser = null; - /** - * @param \Iterator $tokens - * @return PropertyDeclarationNode - */ - public function parse(\Iterator &$tokens): PropertyDeclarationNode + public function parse(Lexer $lexer): PropertyDeclarationNode { - Scanner::assertType($tokens, TokenType::STRING); - $propertyNameToken = $tokens->current(); + $lexer->read(TokenType::WORD); + $propertyNameToken = $lexer->getTokenUnderCursor(); - Scanner::skipOne($tokens); - - Scanner::assertType($tokens, TokenType::COLON); - Scanner::skipOne($tokens); - - Scanner::skipSpace($tokens); + $lexer->read(TokenType::SYMBOL_COLON); + $lexer->skipSpace(); $this->typeReferenceParser ??= TypeReferenceParser::singleton(); - $typeReferenceNode = $this->typeReferenceParser->parse($tokens); + $typeReferenceNode = $this->typeReferenceParser->parse($lexer); return new PropertyDeclarationNode( rangeInSource: Range::from( - $propertyNameToken->boundaries->start, + $propertyNameToken->rangeInSource->start, $typeReferenceNode->rangeInSource->end ), name: new PropertyNameNode( - rangeInSource: $propertyNameToken->boundaries, + rangeInSource: $propertyNameToken->rangeInSource, value: PropertyName::from($propertyNameToken->value) ), type: $typeReferenceNode diff --git a/src/Language/Parser/StringLiteral/StringLiteralParser.php b/src/Language/Parser/StringLiteral/StringLiteralParser.php index fe6f1cb..8f4b105 100644 --- a/src/Language/Parser/StringLiteral/StringLiteralParser.php +++ b/src/Language/Parser/StringLiteral/StringLiteralParser.php @@ -24,29 +24,38 @@ use PackageFactory\ComponentEngine\Framework\PHP\Singleton\Singleton; use PackageFactory\ComponentEngine\Language\AST\Node\StringLiteral\StringLiteralNode; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Scanner; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Parser\Source\Range; final class StringLiteralParser { use Singleton; - /** - * @param \Iterator $tokens - * @return StringLiteralNode - */ - public function parse(\Iterator &$tokens): StringLiteralNode + public function parse(Lexer $lexer): StringLiteralNode { - Scanner::assertType($tokens, TokenType::STRING_QUOTED); + $lexer->read(TokenType::STRING_LITERAL_DELIMITER); + $start = $lexer->getStartPosition(); - $token = $tokens->current(); + $value = ''; + while (!$lexer->peek(TokenType::STRING_LITERAL_DELIMITER)) { + if ($lexer->probe(TokenType::STRING_LITERAL_CONTENT)) { + $value = $lexer->getTokenUnderCursor()->value; + } - Scanner::skipOne($tokens); + if ($lexer->probe(TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER)) { + $value = $lexer->getTokenUnderCursor()->value; + } + break; + } + + + $lexer->read(TokenType::STRING_LITERAL_DELIMITER); + $end = $lexer->getEndPosition(); return new StringLiteralNode( - rangeInSource: $token->boundaries, - value: $token->value + rangeInSource: Range::from($start, $end), + value: $value ); } } diff --git a/src/Language/Parser/StructDeclaration/StructDeclarationParser.php b/src/Language/Parser/StructDeclaration/StructDeclarationParser.php index 37c1496..57f88f8 100644 --- a/src/Language/Parser/StructDeclaration/StructDeclarationParser.php +++ b/src/Language/Parser/StructDeclaration/StructDeclarationParser.php @@ -27,11 +27,12 @@ use PackageFactory\ComponentEngine\Language\AST\Node\PropertyDeclaration\PropertyDeclarationNodes; use PackageFactory\ComponentEngine\Language\AST\Node\StructDeclaration\StructDeclarationNode; use PackageFactory\ComponentEngine\Language\AST\Node\StructDeclaration\StructNameNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; use PackageFactory\ComponentEngine\Language\Parser\PropertyDeclaration\PropertyDeclarationParser; use PackageFactory\ComponentEngine\Parser\Source\Range; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Scanner; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; +use PackageFactory\ComponentEngine\Parser\Tokenizer\Token as TokenizerToken; final class StructDeclarationParser { @@ -39,104 +40,50 @@ final class StructDeclarationParser private ?PropertyDeclarationParser $propertyDeclarationParser = null; - /** - * @param \Iterator $tokens - * @return StructDeclarationNode - */ - public function parse(\Iterator &$tokens): StructDeclarationNode + public function parse(Lexer $lexer): StructDeclarationNode { - $structKeywordToken = $this->extractStructKeywordToken($tokens); - $structNameNode = $this->parseStructName($tokens); - $this->skipOpeningBracketToken($tokens); - $propertyDeclarationNodes = $this->parsePropertyDeclarations($tokens); - $closingBracketToken = $this->extractClosingBracketToken($tokens); + $lexer->read(TokenType::KEYWORD_STRUCT); + $start = $lexer->getStartPosition(); + $lexer->skipSpace(); + + $structNameNode = $this->parseStructName($lexer); + $propertyDeclarationNodes = $this->parsePropertyDeclarations($lexer); + $end = $lexer->getEndPosition(); return new StructDeclarationNode( - rangeInSource: Range::from( - $structKeywordToken->boundaries->start, - $closingBracketToken->boundaries->end - ), + rangeInSource: Range::from($start, $end), name: $structNameNode, properties: $propertyDeclarationNodes ); } - /** - * @param \Iterator $tokens - * @return Token - */ - public function extractStructKeywordToken(\Iterator &$tokens): Token + private function parseStructName(Lexer $lexer): StructNameNode { - Scanner::assertType($tokens, TokenType::KEYWORD_STRUCT); - - $structKeywordToken = $tokens->current(); + $lexer->read(TokenType::WORD); + $structNameToken = $lexer->getTokenUnderCursor(); - Scanner::skipOne($tokens); - Scanner::skipSpace($tokens); - - return $structKeywordToken; - } - - /** - * @param \Iterator $tokens - * @return StructNameNode - */ - public function parseStructName(\Iterator &$tokens): StructNameNode - { - Scanner::assertType($tokens, TokenType::STRING); - - $structNameToken = $tokens->current(); - - Scanner::skipOne($tokens); - Scanner::skipSpaceAndComments($tokens); + $lexer->skipSpaceAndComments(); return new StructNameNode( - rangeInSource: $structNameToken->boundaries, + rangeInSource: $structNameToken->rangeInSource, value: StructName::from($structNameToken->value) ); } - /** - * @param \Iterator $tokens - * @return void - */ - public function skipOpeningBracketToken(\Iterator &$tokens): void - { - Scanner::assertType($tokens, TokenType::BRACKET_CURLY_OPEN); - Scanner::skipOne($tokens); - Scanner::skipSpaceAndComments($tokens); - } - - /** - * @param \Iterator $tokens - * @return PropertyDeclarationNodes - */ - public function parsePropertyDeclarations(\Iterator &$tokens): PropertyDeclarationNodes + public function parsePropertyDeclarations(Lexer $lexer): PropertyDeclarationNodes { $this->propertyDeclarationParser ??= PropertyDeclarationParser::singleton(); + $lexer->read(TokenType::BRACKET_CURLY_OPEN); + $lexer->skipSpaceAndComments(); + $items = []; - while (Scanner::type($tokens) === TokenType::STRING) { - assert($this->propertyDeclarationParser !== null); - $items[] = $this->propertyDeclarationParser->parse($tokens); - Scanner::skipSpaceAndComments($tokens); + while (!$lexer->probe(TokenType::BRACKET_CURLY_CLOSE)) { + $lexer->expect(TokenType::WORD); + $items[] = $this->propertyDeclarationParser->parse($lexer); + $lexer->skipSpaceAndComments(); } return new PropertyDeclarationNodes(...$items); } - - /** - * @param \Iterator $tokens - * @return Token - */ - public function extractClosingBracketToken(\Iterator &$tokens): Token - { - Scanner::assertType($tokens, TokenType::BRACKET_CURLY_CLOSE); - - $closingBracketToken = $tokens->current(); - - Scanner::skipOne($tokens); - - return $closingBracketToken; - } } diff --git a/src/Language/Parser/Tag/TagParser.php b/src/Language/Parser/Tag/TagParser.php index ce63ed0..6895332 100644 --- a/src/Language/Parser/Tag/TagParser.php +++ b/src/Language/Parser/Tag/TagParser.php @@ -22,6 +22,7 @@ namespace PackageFactory\ComponentEngine\Language\Parser\Tag; +use LogicException; use PackageFactory\ComponentEngine\Domain\AttributeName\AttributeName; use PackageFactory\ComponentEngine\Domain\TagName\TagName; use PackageFactory\ComponentEngine\Framework\PHP\Singleton\Singleton; @@ -33,131 +34,97 @@ use PackageFactory\ComponentEngine\Language\AST\Node\Tag\ChildNodes; use PackageFactory\ComponentEngine\Language\AST\Node\Tag\TagNameNode; use PackageFactory\ComponentEngine\Language\AST\Node\Tag\TagNode; +use PackageFactory\ComponentEngine\Language\AST\Node\Text\TextNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; use PackageFactory\ComponentEngine\Language\Parser\Expression\ExpressionParser; use PackageFactory\ComponentEngine\Language\Parser\StringLiteral\StringLiteralParser; use PackageFactory\ComponentEngine\Language\Parser\Text\TextParser; use PackageFactory\ComponentEngine\Parser\Source\Range; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Scanner; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenTypes; final class TagParser { use Singleton; + private static TokenTypes $TOKEN_TYPES_ATTRIBUTE_DELIMITERS; + private ?StringLiteralParser $stringLiteralParser = null; private ?TextParser $textParser = null; private ?ExpressionParser $expressionParser = null; - /** - * @param \Iterator $tokens - * @return TagNode - */ - public function parse(\Iterator &$tokens): TagNode + private function __construct() + { + self::$TOKEN_TYPES_ATTRIBUTE_DELIMITERS ??= TokenTypes::from( + TokenType::STRING_LITERAL_DELIMITER, + TokenType::BRACKET_CURLY_OPEN + ); + } + + public function parse(Lexer $lexer): TagNode { - $tagStartOpeningToken = $this->extractTagStartOpeningToken($tokens); - $tagNameNode = $this->parseTagName($tokens); - $attributeNodes = $this->parseAttributes($tokens); + $lexer->read(TokenType::BRACKET_ANGLE_OPEN); + $start = $lexer->getStartPosition(); + + $name = $this->parseName($lexer); + $attributes = $this->parseAttributes($lexer); + + if ($lexer->probe(TokenType::SYMBOL_SLASH_FORWARD)) { + $lexer->read(TokenType::BRACKET_ANGLE_CLOSE); + $end = $lexer->getEndPosition(); - if ($tagSelfCloseToken = $this->extractTagSelfCloseToken($tokens)) { return new TagNode( - rangeInSource: Range::from( - $tagStartOpeningToken->boundaries->start, - $tagSelfCloseToken->boundaries->end - ), - name: $tagNameNode, - attributes: $attributeNodes, + rangeInSource: Range::from($start, $end), + name: $name, + attributes: $attributes, children: new ChildNodes(), isSelfClosing: true ); - } else { - $this->skipTagEndToken($tokens); - $children = $this->parseChildren($tokens); - $this->skipTagStartClosingToken($tokens); - $this->assertAndSkipClosingTagName($tokens, $tagNameNode); - $closingTagEndToken = $this->extractTagEndToken($tokens); - - return new TagNode( - rangeInSource: Range::from( - $tagStartOpeningToken->boundaries->start, - $closingTagEndToken->boundaries->end - ), - name: $tagNameNode, - attributes: $attributeNodes, - children: $children, - isSelfClosing: false - ); } - } - /** - * @param \Iterator $tokens - * @return Token - */ - private function extractTagStartOpeningToken(\Iterator &$tokens): Token - { - Scanner::assertType($tokens, TokenType::TAG_START_OPENING); - $tagStartOpeningToken = $tokens->current(); - Scanner::skipOne($tokens); + $lexer->read(TokenType::BRACKET_ANGLE_CLOSE); + $children = $this->parseChildren($lexer); + + $this->readClosingTagName($lexer, $name->value); + $end = $lexer->getEndPosition(); - return $tagStartOpeningToken; + return new TagNode( + rangeInSource: Range::from($start, $end), + name: $name, + attributes: $attributes, + children: $children, + isSelfClosing: false + ); } - /** - * @param \Iterator $tokens - * @return TagNameNode - */ - private function parseTagName(\Iterator &$tokens): TagNameNode + private function parseName(Lexer $lexer): TagNameNode { - Scanner::assertType($tokens, TokenType::STRING); - $tagNameToken = $tokens->current(); - Scanner::skipOne($tokens); + $lexer->read(TokenType::WORD); + $tagNameToken = $lexer->getTokenUnderCursor(); + + $lexer->skipSpace(); return new TagNameNode( - rangeInSource: $tagNameToken->boundaries, + rangeInSource: $tagNameToken->rangeInSource, value: TagName::from($tagNameToken->value) ); } - /** - * @param \Iterator $tokens - * @return AttributeNodes - */ - private function parseAttributes(\Iterator &$tokens): AttributeNodes + private function parseAttributes(Lexer $lexer): AttributeNodes { $items = []; - while (!$this->isTagEnd($tokens)) { - Scanner::skipSpace($tokens); - - $items[] = $this->parseAttribute($tokens); - - Scanner::skipSpace($tokens); + while ($lexer->peek(TokenType::WORD)) { + $items[] = $this->parseAttribute($lexer); + $lexer->skipSpace(); } return new AttributeNodes(...$items); } - /** - * @param \Iterator $tokens - * @return boolean - */ - private function isTagEnd(\Iterator $tokens): bool - { - return ( - Scanner::type($tokens) === TokenType::TAG_END || - Scanner::type($tokens) === TokenType::TAG_SELF_CLOSE - ); - } - - /** - * @param \Iterator $tokens - * @return AttributeNode - */ - private function parseAttribute(\Iterator &$tokens): AttributeNode + private function parseAttribute(Lexer $lexer): AttributeNode { - $attributeNameNode = $this->parseAttributeName($tokens); - $attributeValueNode = $this->parseAttributeValue($tokens); + $attributeNameNode = $this->parseAttributeName($lexer); + $attributeValueNode = $this->parseAttributeValue($lexer); return new AttributeNode( rangeInSource: Range::from( @@ -170,187 +137,100 @@ private function parseAttribute(\Iterator &$tokens): AttributeNode ); } - /** - * @param \Iterator $tokens - * @return AttributeNameNode - */ - private function parseAttributeName(\Iterator &$tokens): AttributeNameNode + private function parseAttributeName(Lexer $lexer): AttributeNameNode { - Scanner::assertType($tokens, TokenType::STRING); - $attributeNameToken = $tokens->current(); - Scanner::skipOne($tokens); + $lexer->read(TokenType::WORD); + $attributeNameToken = $lexer->getTokenUnderCursor(); return new AttributeNameNode( - rangeInSource: $attributeNameToken->boundaries, + rangeInSource: $attributeNameToken->rangeInSource, value: AttributeName::from($attributeNameToken->value) ); } - /** - * @param \Iterator $tokens - * @return null|StringLiteralNode|ExpressionNode - */ - private function parseAttributeValue(\Iterator &$tokens): null|StringLiteralNode|ExpressionNode + private function parseAttributeValue(Lexer $lexer): null|StringLiteralNode|ExpressionNode { - if (Scanner::type($tokens) === TokenType::EQUALS) { - Scanner::skipOne($tokens); - - return match (Scanner::type($tokens)) { - TokenType::STRING_QUOTED => - $this->parseString($tokens), + if ($lexer->probe(TokenType::SYMBOL_EQUALS)) { + return match ($lexer->expectOneOf(self::$TOKEN_TYPES_ATTRIBUTE_DELIMITERS)) { + TokenType::STRING_LITERAL_DELIMITER => + $this->parseString($lexer), TokenType::BRACKET_CURLY_OPEN => - $this->parseExpression($tokens), - default => throw TagCouldNotBeParsed::becauseOfUnexpectedToken( - expectedTokenTypes: TokenTypes::from( - TokenType::STRING_QUOTED, - TokenType::BRACKET_CURLY_OPEN - ), - actualToken: $tokens->current() - ) + $this->parseExpression($lexer), + default => throw new LogicException() }; } return null; } - /** - * @param \Iterator $tokens - * @return StringLiteralNode - */ - private function parseString(\Iterator &$tokens): StringLiteralNode + private function parseString(Lexer $lexer): StringLiteralNode { $this->stringLiteralParser ??= StringLiteralParser::singleton(); - return $this->stringLiteralParser->parse($tokens); + return $this->stringLiteralParser->parse($lexer); } - /** - * @param \Iterator $tokens - * @return ExpressionNode - */ - private function parseExpression(\Iterator &$tokens): ExpressionNode + private function parseExpression(Lexer $lexer): ExpressionNode { - $this->expressionParser ??= new ExpressionParser( - stopAt: TokenType::BRACKET_CURLY_CLOSE - ); + $this->expressionParser ??= new ExpressionParser(); - Scanner::assertType($tokens, TokenType::BRACKET_CURLY_OPEN); - Scanner::skipOne($tokens); + $lexer->read(TokenType::BRACKET_CURLY_OPEN); - $expressionNode = $this->expressionParser->parse($tokens); + $expressionNode = $this->expressionParser->parse($lexer); - Scanner::assertType($tokens, TokenType::BRACKET_CURLY_CLOSE); - Scanner::skipOne($tokens); + $lexer->read(TokenType::BRACKET_CURLY_CLOSE); return $expressionNode; } - /** - * @param \Iterator $tokens - * @return null|Token - */ - private function extractTagSelfCloseToken(\Iterator &$tokens): ?Token - { - if (Scanner::type($tokens) === TokenType::TAG_SELF_CLOSE) { - $tagSelfCloseToken = $tokens->current(); - Scanner::skipOne($tokens); - - return $tagSelfCloseToken; - } - - return null; - } - - /** - * @param \Iterator $tokens - * @return void - */ - private function skipTagEndToken(\Iterator &$tokens): void - { - Scanner::assertType($tokens, TokenType::TAG_END); - Scanner::skipOne($tokens); - } - - /** - * @param \Iterator $tokens - * @return ChildNodes - */ - private function parseChildren(\Iterator &$tokens): ChildNodes + private function parseChildren(Lexer $lexer): ChildNodes { $items = []; $preserveLeadingSpace = false; - while (Scanner::type($tokens) !== TokenType::TAG_START_CLOSING) { - $this->textParser ??= TextParser::singleton(); - if ($textNode = $this->textParser->parse($tokens, $preserveLeadingSpace)) { - $items[] = $textNode; - } - if (Scanner::type($tokens) === TokenType::TAG_START_OPENING) { - $items[] = $this->parse($tokens); - $preserveLeadingSpace = Scanner::type($tokens) !== TokenType::END_OF_LINE; + while (!$lexer->peek(TokenType::SYMBOL_CLOSE_TAG)) { + if ($lexer->peek(TokenType::BRACKET_ANGLE_OPEN)) { + $items[] = $this->parse($lexer); + $preserveLeadingSpace = !$lexer->peek(TokenType::END_OF_LINE); continue; } - if (Scanner::type($tokens) === TokenType::BRACKET_CURLY_OPEN) { - $items[] = $this->parseExpression($tokens); - $preserveLeadingSpace = Scanner::type($tokens) !== TokenType::END_OF_LINE; + if ($lexer->peek(TokenType::BRACKET_CURLY_OPEN)) { + $items[] = $this->parseExpression($lexer); + $preserveLeadingSpace = !$lexer->peek(TokenType::END_OF_LINE); continue; } - if (Scanner::type($tokens) !== TokenType::TAG_START_CLOSING) { - throw TagCouldNotBeParsed::becauseOfUnexpectedToken( - expectedTokenTypes: TokenTypes::from( - TokenType::TAG_START_OPENING, - TokenType::TAG_START_CLOSING, - TokenType::BRACKET_CURLY_OPEN - ), - actualToken: $tokens->current() - ); + if ($textNode = $this->parseText($lexer, $preserveLeadingSpace)) { + $items[] = $textNode; } } return new ChildNodes(...$items); } - /** - * @param \Iterator $tokens - * @return void - */ - private function skipTagStartClosingToken(\Iterator &$tokens): void + private function parseText(Lexer $lexer, bool $preserveLeadingSpace): ?TextNode { - Scanner::assertType($tokens, TokenType::TAG_START_CLOSING); - Scanner::skipOne($tokens); + $this->textParser ??= TextParser::singleton(); + return $this->textParser->parse($lexer, $preserveLeadingSpace); } - /** - * @param \Iterator $tokens - * @param TagNameNode $openingTagNameNode - * @return void - */ - private function assertAndSkipClosingTagName(\Iterator &$tokens, TagNameNode $openingTagNameNode): void + private function readClosingTagName(Lexer $lexer, TagName $expectedName): void { - Scanner::assertType($tokens, TokenType::STRING); - $tagNameToken = $tokens->current(); - Scanner::skipOne($tokens); + $lexer->read(TokenType::SYMBOL_CLOSE_TAG); + $start = $lexer->getStartPosition(); + + $lexer->read(TokenType::WORD); + $closingNameToken = $lexer->getTokenUnderCursor(); - if ($tagNameToken->value !== $openingTagNameNode->value->value) { + $lexer->read(TokenType::BRACKET_ANGLE_CLOSE); + $end = $lexer->getEndPosition(); + + if ($closingNameToken->value !== $expectedName->value) { throw TagCouldNotBeParsed::becauseOfClosingTagNameMismatch( - expectedTagName: $openingTagNameNode->value, - actualTagName: $tagNameToken->value, - affectedRangeInSource: $tagNameToken->boundaries + expectedTagName: $expectedName, + actualTagName: $closingNameToken->value, + affectedRangeInSource: Range::from($start, $end) ); } } - - /** - * @param \Iterator $tokens - * @return Token - */ - private function extractTagEndToken(\Iterator &$tokens): Token - { - Scanner::assertType($tokens, TokenType::TAG_END); - $tagEndToken = $tokens->current(); - Scanner::skipOne($tokens); - - return $tagEndToken; - } } diff --git a/src/Language/Parser/TemplateLiteral/TemplateLiteralParser.php b/src/Language/Parser/TemplateLiteral/TemplateLiteralParser.php index 509f017..0dee8d6 100644 --- a/src/Language/Parser/TemplateLiteral/TemplateLiteralParser.php +++ b/src/Language/Parser/TemplateLiteral/TemplateLiteralParser.php @@ -24,14 +24,15 @@ use PackageFactory\ComponentEngine\Framework\PHP\Singleton\Singleton; use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralExpressionSegmentNode; +use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralLine; +use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralLines; use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralNode; use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralSegments; use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralStringSegmentNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; use PackageFactory\ComponentEngine\Language\Parser\Expression\ExpressionParser; use PackageFactory\ComponentEngine\Parser\Source\Range; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Scanner; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; final class TemplateLiteralParser { @@ -39,93 +40,90 @@ final class TemplateLiteralParser private ?ExpressionParser $expressionParser = null; - /** - * @param \Iterator $tokens - * @return TemplateLiteralNode - */ - public function parse(\Iterator &$tokens): TemplateLiteralNode + public function parse(Lexer $lexer): TemplateLiteralNode { - Scanner::assertType($tokens, TokenType::TEMPLATE_LITERAL_START); - $startingDelimiterToken = $tokens->current(); - Scanner::skipOne($tokens); + $lexer->read(TokenType::TEMPLATE_LITERAL_DELIMITER); + $start = $lexer->getStartPosition(); - $segments = $this->parseSegments($tokens); + $lines = $this->parseLines($lexer); - Scanner::assertType($tokens, TokenType::TEMPLATE_LITERAL_END); - $finalDelimiterToken = $tokens->current(); - Scanner::skipOne($tokens); + $lexer->read(TokenType::TEMPLATE_LITERAL_DELIMITER); + $end = $lexer->getEndPosition(); return new TemplateLiteralNode( - rangeInSource: Range::from( - $startingDelimiterToken->boundaries->start, - $finalDelimiterToken->boundaries->end - ), + rangeInSource: Range::from($start, $end), + indentation: $lexer->getStartPosition()->columnNumber, + lines: $lines + ); + } + + public function parseLines(Lexer $lexer): TemplateLiteralLines + { + $lexer->read(TokenType::END_OF_LINE); + $lexer->probe(TokenType::SPACE); + + $items = []; + while (!$lexer->peek(TokenType::TEMPLATE_LITERAL_DELIMITER)) { + $items[] = $this->parseLine($lexer); + $lexer->read(TokenType::END_OF_LINE); + $lexer->probe(TokenType::SPACE); + } + + return new TemplateLiteralLines(...$items); + } + + public function parseLine(Lexer $lexer): TemplateLiteralLine + { + $segments = $this->parseSegments($lexer); + $indentation = $segments->items[0]?->rangeInSource->start->columnNumber ?? 0; + + return new TemplateLiteralLine( + indentation: $indentation, segments: $segments ); } - /** - * @param \Iterator $tokens - * @return TemplateLiteralSegments - */ - public function parseSegments(\Iterator &$tokens): TemplateLiteralSegments + public function parseSegments(Lexer $lexer): TemplateLiteralSegments { $items = []; - while (Scanner::type($tokens) !== TokenType::TEMPLATE_LITERAL_END) { - $items[] = match (Scanner::type($tokens)) { - TokenType::STRING_QUOTED => $this->parseStringSegment($tokens), - TokenType::DOLLAR => $this->parseExpressionSegment($tokens), - default => throw new \Exception(__METHOD__ . ' for ' . Scanner::type($tokens)->value . ' is not implemented yet!') - }; + while (!$lexer->peek(TokenType::END_OF_LINE)) { + if ($lexer->peek(TokenType::BRACKET_CURLY_OPEN)) { + $items[] = $this->parseExpressionSegment($lexer); + continue; + } + $items[] = $this->parseStringSegment($lexer); } return new TemplateLiteralSegments(...$items); } - /** - * @param \Iterator $tokens - * @return TemplateLiteralStringSegmentNode - */ - public function parseStringSegment(\Iterator &$tokens): TemplateLiteralStringSegmentNode + public function parseStringSegment(Lexer $lexer): TemplateLiteralStringSegmentNode { - Scanner::assertType($tokens, TokenType::STRING_QUOTED); - $stringToken = $tokens->current(); - Scanner::skipOne($tokens); + $lexer->read(TokenType::TEMPLATE_LITERAL_CONTENT); + $stringToken = $lexer->getTokenUnderCursor(); return new TemplateLiteralStringSegmentNode( - rangeInSource: $stringToken->boundaries, + rangeInSource: $stringToken->rangeInSource, value: $stringToken->value ); } - /** - * @param \Iterator $tokens - * @return TemplateLiteralExpressionSegmentNode - */ - public function parseExpressionSegment(\Iterator &$tokens): TemplateLiteralExpressionSegmentNode + public function parseExpressionSegment(Lexer $lexer): TemplateLiteralExpressionSegmentNode { - $this->expressionParser ??= new ExpressionParser( - stopAt: TokenType::BRACKET_CURLY_CLOSE - ); - - Scanner::assertType($tokens, TokenType::DOLLAR); - $dollarToken = $tokens->current(); - Scanner::skipOne($tokens); + $this->expressionParser ??= new ExpressionParser(); - Scanner::assertType($tokens, TokenType::BRACKET_CURLY_OPEN); - Scanner::skipOne($tokens); + $lexer->read(TokenType::BRACKET_CURLY_OPEN); + $start = $lexer->getStartPosition(); + $lexer->skipSpaceAndComments(); - $expression = $this->expressionParser->parse($tokens); + $expression = $this->expressionParser->parse($lexer); - Scanner::assertType($tokens, TokenType::BRACKET_CURLY_CLOSE); - $closingBracketToken = $tokens->current(); - Scanner::skipOne($tokens); + $lexer->skipSpaceAndComments(); + $lexer->read(TokenType::BRACKET_CURLY_CLOSE); + $end = $lexer->getEndPosition(); return new TemplateLiteralExpressionSegmentNode( - rangeInSource: Range::from( - $dollarToken->boundaries->start, - $closingBracketToken->boundaries->end - ), + rangeInSource: Range::from($start, $end), expression: $expression ); } diff --git a/src/Language/Parser/Text/TextParser.php b/src/Language/Parser/Text/TextParser.php index 25ef70c..3497dff 100644 --- a/src/Language/Parser/Text/TextParser.php +++ b/src/Language/Parser/Text/TextParser.php @@ -24,136 +24,91 @@ use PackageFactory\ComponentEngine\Framework\PHP\Singleton\Singleton; use PackageFactory\ComponentEngine\Language\AST\Node\Text\TextNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; +use PackageFactory\ComponentEngine\Parser\Source\Position; use PackageFactory\ComponentEngine\Parser\Source\Range; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Scanner; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; final class TextParser { use Singleton; - private string $value; + private static TokenTypes $TOKEN_TYPES_END_DELIMITERS; + private static TokenTypes $TOKEN_TYPES_CONTENT; - private ?Token $startingToken; - private ?Token $finalToken; - - private bool $trimLeadingSpace; - private bool $trimTrailingSpace; - private bool $currentlyCapturingSpace; - private bool $trailingSpaceContainsLineBreak; - private bool $terminated; - - /** - * @param \Iterator $tokens - * @param boolean $preserveLeadingSpace - * @return null|TextNode - */ - public function parse(\Iterator &$tokens, bool $preserveLeadingSpace = false): ?TextNode + private function __construct() { - $this->reset($preserveLeadingSpace); - - while (!Scanner::isEnd($tokens) && !$this->terminated) { - $this->startingToken ??= $tokens->current(); - - match (Scanner::type($tokens)) { - TokenType::BRACKET_CURLY_OPEN, - TokenType::TAG_START_OPENING => - $this->terminateAtAdjacentChildNode(), - TokenType::TAG_START_CLOSING => - $this->terminateAtClosingTag(), - TokenType::SPACE => - $this->captureSpace($tokens->current()), - TokenType::END_OF_LINE => - $this->captureLineBreak($tokens->current()), - default => - $this->captureText($tokens->current()), - }; - - if (!$this->terminated) { - Scanner::skipOne($tokens); - } - } - - return $this->build(); - } - - private function reset(bool $preserveLeadingSpace): void - { - $this->value = ''; - - $this->startingToken = null; - $this->finalToken = null; - - $this->trimLeadingSpace = !$preserveLeadingSpace; - $this->trimTrailingSpace = true; - $this->currentlyCapturingSpace = false; - $this->trailingSpaceContainsLineBreak = false; - $this->terminated = false; - } - - private function terminateAtAdjacentChildNode(): void - { - $this->terminated = true; - $this->trimTrailingSpace = $this->trailingSpaceContainsLineBreak; - } - - private function terminateAtClosingTag(): void - { - $this->terminated = true; + self::$TOKEN_TYPES_END_DELIMITERS = TokenTypes::from( + TokenType::SYMBOL_CLOSE_TAG, + TokenType::BRACKET_ANGLE_OPEN, + TokenType::BRACKET_CURLY_OPEN + ); + self::$TOKEN_TYPES_CONTENT = TokenTypes::from( + TokenType::SPACE, + TokenType::END_OF_LINE, + TokenType::TEXT + ); } - private function captureSpace(Token $token): void + public function parse(Lexer $lexer, bool $preserveLeadingSpace = false): ?TextNode { - $this->finalToken = $token; + /** @var null|Position $start */ + $start = null; + $hasLeadingSpace = false; - if ($this->currentlyCapturingSpace) { - return; + if ($lexer->probe(TokenType::SPACE)) { + $start = $lexer->getStartPosition(); + $hasLeadingSpace = true; } - $this->currentlyCapturingSpace = true; - $this->value .= ' '; - } + if ($lexer->probe(TokenType::END_OF_LINE)) { + $start ??= $lexer->getStartPosition(); + $hasLeadingSpace = false; + } - private function captureLineBreak(Token $token): void - { - $this->captureSpace($token); - $this->trailingSpaceContainsLineBreak = true; - } + $lexer->skipSpace(); + if ($lexer->isEnd() || $lexer->peekOneOf(self::$TOKEN_TYPES_END_DELIMITERS)) { + return null; + } - private function captureText(Token $token): void - { - $this->finalToken = $token; - $this->currentlyCapturingSpace = false; - $this->trailingSpaceContainsLineBreak = false; + $hasTrailingSpace = false; + $trailingSpaceContainsLineBreaks = false; + $value = $hasLeadingSpace && $preserveLeadingSpace ? ' ' : ''; + while (!$lexer->isEnd() && !$lexer->peekOneOf(self::$TOKEN_TYPES_END_DELIMITERS)) { + $lexer->readOneOf(self::$TOKEN_TYPES_CONTENT); + + if ($lexer->getTokenTypeUnderCursor() === TokenType::TEXT) { + $start ??= $lexer->getStartPosition(); + if ($hasTrailingSpace) { + $value .= ' '; + $hasTrailingSpace = false; + $trailingSpaceContainsLineBreaks = false; + } + $value .= $lexer->getTokenUnderCursor()->value; + continue; + } - $this->value .= $token->value; - } + if ($lexer->getTokenTypeUnderCursor() === TokenType::END_OF_LINE) { + $trailingSpaceContainsLineBreaks = true; + } - private function build(): ?TextNode - { - if (is_null($this->startingToken) || is_null($this->finalToken)) { - return null; + $hasTrailingSpace = true; } - if ($this->trimLeadingSpace) { - $this->value = ltrim($this->value); + if ($start === null) { + return null; } - if ($this->trimTrailingSpace) { - $this->value = rtrim($this->value); - } + $end = $lexer->getEndPosition(); - if ($this->value === '' || $this->value === ' ') { - return null; + if ($hasTrailingSpace && !$trailingSpaceContainsLineBreaks && !$lexer->isEnd() && !$lexer->peek(TokenType::SYMBOL_CLOSE_TAG)) { + $value .= ' '; } return new TextNode( - rangeInSource: Range::from( - $this->startingToken->boundaries->start, - $this->finalToken->boundaries->end - ), - value: $this->value + rangeInSource: Range::from($start, $end), + value: $value ); } } diff --git a/src/Language/Parser/TypeReference/TypeReferenceParser.php b/src/Language/Parser/TypeReference/TypeReferenceParser.php index f4fa734..c11afab 100644 --- a/src/Language/Parser/TypeReference/TypeReferenceParser.php +++ b/src/Language/Parser/TypeReference/TypeReferenceParser.php @@ -29,39 +29,29 @@ use PackageFactory\ComponentEngine\Language\AST\Node\TypeReference\TypeNameNode; use PackageFactory\ComponentEngine\Language\AST\Node\TypeReference\TypeNameNodes; use PackageFactory\ComponentEngine\Language\AST\Node\TypeReference\TypeReferenceNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Parser\Source\Position; use PackageFactory\ComponentEngine\Parser\Source\Range; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Scanner; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; final class TypeReferenceParser { use Singleton; - /** - * @param \Iterator $tokens - * @return TypeReferenceNode - */ - public function parse(\Iterator &$tokens): TypeReferenceNode - { - $startingToken = $tokens->current(); - $questionmarkToken = $this->extractQuestionmarkToken($tokens); - $isOptional = !is_null($questionmarkToken); - - $typeNameNodes = $this->parseTypeNames($tokens); - - $closingArrayToken = $this->extractClosingArrayToken($tokens); - $isArray = !is_null($closingArrayToken); + private ?Position $start = null; - $rangeInSource = Range::from( - $startingToken->boundaries->start, - $closingArrayToken?->boundaries->end - ?? $typeNameNodes->getLast()->rangeInSource->end - ); + public function parse(Lexer $lexer): TypeReferenceNode + { + $this->start = null; + $isOptional = $lexer->probe(TokenType::SYMBOL_QUESTIONMARK); + $this->start = $lexer->getStartPosition(); + $typeNameNodes = $this->parseTypeNames($lexer); + $isArray = $this->parseIsArray($lexer); + $end = $lexer->getEndPosition(); try { return new TypeReferenceNode( - rangeInSource: $rangeInSource, + rangeInSource: Range::from($this->start, $end), names: $typeNameNodes, isArray: $isArray, isOptional: $isOptional @@ -71,37 +61,15 @@ public function parse(\Iterator &$tokens): TypeReferenceNode } } - /** - * @param \Iterator $tokens - * @return Token - */ - public function extractQuestionmarkToken(\Iterator &$tokens): ?Token - { - if (Scanner::type($tokens) === TokenType::QUESTIONMARK) { - $questionmarkToken = $tokens->current(); - Scanner::skipOne($tokens); - - return $questionmarkToken; - } - - return null; - } - - /** - * @param \Iterator $tokens - * @return TypeNameNodes - */ - public function parseTypeNames(\Iterator &$tokens): TypeNameNodes + public function parseTypeNames(Lexer $lexer): TypeNameNodes { $items = []; while (true) { - $items[] = $this->parseTypeName($tokens); + $items[] = $this->parseTypeName($lexer); - if (Scanner::isEnd($tokens) || Scanner::type($tokens) !== TokenType::PIPE) { + if ($lexer->isEnd() || !$lexer->probe(TokenType::SYMBOL_PIPE)) { break; } - - Scanner::skipOne($tokens); } try { @@ -111,41 +79,29 @@ public function parseTypeNames(\Iterator &$tokens): TypeNameNodes } } - /** - * @param \Iterator $tokens - * @return TypeNameNode - */ - public function parseTypeName(\Iterator &$tokens): TypeNameNode + public function parseTypeName(Lexer $lexer): TypeNameNode { - Scanner::assertType($tokens, TokenType::STRING); - - $typeNameToken = $tokens->current(); - - Scanner::skipOne($tokens); + $lexer->read(TokenType::WORD); + $this->start ??= $lexer->getStartPosition(); + $typeNameToken = $lexer->getTokenUnderCursor(); return new TypeNameNode( - rangeInSource: $typeNameToken->boundaries, + rangeInSource: $typeNameToken->rangeInSource, value: TypeName::from($typeNameToken->value) ); } - /** - * @param \Iterator $tokens - * @return Token - */ - public function extractClosingArrayToken(\Iterator &$tokens): ?Token + public function parseIsArray(Lexer $lexer): bool { - if (!Scanner::isEnd($tokens) && Scanner::type($tokens) === TokenType::BRACKET_SQUARE_OPEN) { - Scanner::skipOne($tokens); - Scanner::assertType($tokens, TokenType::BRACKET_SQUARE_CLOSE); - - $closingArrayToken = $tokens->current(); - - Scanner::skipOne($tokens); + if ($lexer->isEnd()) { + return false; + } - return $closingArrayToken; + if ($lexer->probe(TokenType::BRACKET_SQUARE_OPEN)) { + $lexer->read(TokenType::BRACKET_SQUARE_CLOSE); + return true; } - return null; + return false; } } diff --git a/src/Language/Parser/ValueReference/ValueReferenceParser.php b/src/Language/Parser/ValueReference/ValueReferenceParser.php index f955647..d00e891 100644 --- a/src/Language/Parser/ValueReference/ValueReferenceParser.php +++ b/src/Language/Parser/ValueReference/ValueReferenceParser.php @@ -25,28 +25,20 @@ use PackageFactory\ComponentEngine\Domain\VariableName\VariableName; use PackageFactory\ComponentEngine\Framework\PHP\Singleton\Singleton; use PackageFactory\ComponentEngine\Language\AST\Node\ValueReference\ValueReferenceNode; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Scanner; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; final class ValueReferenceParser { use Singleton; - /** - * @param \Iterator $tokens - * @return ValueReferenceNode - */ - public function parse(\Iterator &$tokens): ValueReferenceNode + public function parse(Lexer $lexer): ValueReferenceNode { - Scanner::assertType($tokens, TokenType::STRING); - - $token = $tokens->current(); - - Scanner::skipOne($tokens); + $lexer->read(TokenType::WORD); + $token = $lexer->getTokenUnderCursor(); return new ValueReferenceNode( - rangeInSource: $token->boundaries, + rangeInSource: $token->rangeInSource, name: VariableName::from($token->value) ); } diff --git a/src/Language/Util/DebugHelper.php b/src/Language/Util/DebugHelper.php index 6382e86..7c45808 100644 --- a/src/Language/Util/DebugHelper.php +++ b/src/Language/Util/DebugHelper.php @@ -22,6 +22,17 @@ namespace PackageFactory\ComponentEngine\Language\Util; +use PackageFactory\ComponentEngine\Language\AST\Node\BinaryOperation\BinaryOperationNode; +use PackageFactory\ComponentEngine\Language\AST\Node\Expression\ExpressionNode; +use PackageFactory\ComponentEngine\Language\AST\Node\IntegerLiteral\IntegerLiteralNode; +use PackageFactory\ComponentEngine\Language\AST\Node\Node; +use PackageFactory\ComponentEngine\Language\AST\Node\StringLiteral\StringLiteralNode; +use PackageFactory\ComponentEngine\Language\AST\Node\Tag\TagNode; +use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralExpressionSegmentNode; +use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralNode; +use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralStringSegmentNode; +use PackageFactory\ComponentEngine\Language\AST\Node\TernaryOperation\TernaryOperationNode; +use PackageFactory\ComponentEngine\Language\AST\Node\ValueReference\ValueReferenceNode; use PackageFactory\ComponentEngine\Language\Lexer\Token\Token; use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; @@ -120,4 +131,80 @@ public static function describeToken(Token $token): string { return sprintf('%s ("%s")', $token->type->value, $token->value); } + + public static function printASTNode(Node $node, string $indentation = ''): string + { + return $indentation . match ($node::class) { + BinaryOperationNode::class => self::printBinaryOperationNode($node, $indentation), + ExpressionNode::class => self::printExpressionNode($node, $indentation), + IntegerLiteralNode::class => self::printIntegerLiteralNode($node, $indentation), + StringLiteralNode::class => self::printStringLiteralNode($node, $indentation), + TagNode::class => self::printTagNode($node, $indentation), + TemplateLiteralNode::class => self::printTemplateLiteralNode($node, $indentation), + TernaryOperationNode::class => self::printTernaryOperationNode($node, $indentation), + ValueReferenceNode::class => self::printValueReferenceNode($node, $indentation), + default => throw new \Exception(__METHOD__ . ' is not implemented yet for: ' . $node::class) + }; + } + + public static function printBinaryOperationNode(BinaryOperationNode $node, string $indentation = ''): string + { + $left = self::printASTNode($node->leftOperand, $indentation . ' '); + $right = self::printASTNode($node->rightOperand, $indentation . ' '); + $op = $indentation . ' ' . $node->operator->name; + + return $indentation . 'BinaryOperation' . PHP_EOL . $left . PHP_EOL . $op . PHP_EOL . $right; + } + + public static function printExpressionNode(ExpressionNode $node, string $indentation = ''): string + { + return $indentation . 'Expression' . PHP_EOL . self::printASTNode($node->root, $indentation . ' '); + } + + public static function printIntegerLiteralNode(IntegerLiteralNode $node, string $indentation = ''): string + { + return $indentation . 'IntegerLiteral (format=' . $node->format->name . ')' . $node->value; + } + + public static function printStringLiteralNode(StringLiteralNode $node, string $indentation = ''): string + { + return $indentation . 'StringLiteral "' . substr(addslashes($node->value), 0, 64 - strlen($indentation)) . '"'; + } + + public static function printTemplateLiteralNode(TemplateLiteralNode $node, string $indentation = ''): string + { + $lines = []; + foreach ($node->lines->items as $line) { + $segments = []; + foreach ($line->segments->items as $segment) { + $segments[] = match ($segment::class) { + TemplateLiteralStringSegmentNode::class => $indentation . ' "' . substr(addslashes($segment->value), 0, 64 - strlen($indentation)) . '"', + TemplateLiteralExpressionSegmentNode::class => self::printASTNode($segment->expression, $indentation . ' ') + }; + } + + $lines[] = $indentation . ' Line (indent=' . $line->indentation . ')' . PHP_EOL . join(PHP_EOL, $segments); + } + + return $indentation . 'TemplateLiteral (indent=' . $node->indentation . ')' . PHP_EOL . join(PHP_EOL, $lines) . PHP_EOL; + } + + public static function printTagNode(TagNode $node, string $indentation = ''): string + { + return $indentation . 'Tag <' . $node->name->value->value . '/>'; + } + + public static function printTernaryOperationNode(TernaryOperationNode $node, string $indentation = ''): string + { + $condition = self::printASTNode($node->condition, $indentation . ' '); + $true = self::printASTNode($node->trueBranch, $indentation . ' '); + $false = self::printASTNode($node->falseBranch, $indentation . ' '); + + return $indentation . 'TernaryOperation' . PHP_EOL . $condition . PHP_EOL . $true . PHP_EOL . $false; + } + + public static function printValueReferenceNode(ValueReferenceNode $node, string $indentation = ''): string + { + return $indentation . 'ValueReference ' . $node->name->value; + } } diff --git a/src/Module/Loader/ModuleFile/ModuleFileLoader.php b/src/Module/Loader/ModuleFile/ModuleFileLoader.php index 859fb41..677001a 100644 --- a/src/Module/Loader/ModuleFile/ModuleFileLoader.php +++ b/src/Module/Loader/ModuleFile/ModuleFileLoader.php @@ -22,13 +22,13 @@ namespace PackageFactory\ComponentEngine\Module\Loader\ModuleFile; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Parser\Module\ModuleParser; use PackageFactory\ComponentEngine\Module\LoaderInterface; use PackageFactory\ComponentEngine\Module\ModuleId; use PackageFactory\ComponentEngine\Module\ModuleInterface; use PackageFactory\ComponentEngine\Parser\Source\Path; use PackageFactory\ComponentEngine\Parser\Source\Source; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Tokenizer; final class ModuleFileLoader implements LoaderInterface { @@ -43,13 +43,12 @@ public function loadModule(string $pathToModule): ModuleInterface Path::fromString($pathToModule) ); $source = Source::fromFile($pathToImportFrom->value); - $tokenizer = Tokenizer::fromSource($source); - $tokens = $tokenizer->getIterator(); + $lexer = new Lexer($source->contents); $moduleParser = ModuleParser::singleton(); $moduleId = ModuleId::fromSource($source); - $moduleNode = $moduleParser->parse($tokens); + $moduleNode = $moduleParser->parse($lexer); return new Module( diff --git a/src/Target/Php/Transpiler/BinaryOperation/BinaryOperationTranspiler.php b/src/Target/Php/Transpiler/BinaryOperation/BinaryOperationTranspiler.php index d39d3a0..246d746 100644 --- a/src/Target/Php/Transpiler/BinaryOperation/BinaryOperationTranspiler.php +++ b/src/Target/Php/Transpiler/BinaryOperation/BinaryOperationTranspiler.php @@ -36,6 +36,7 @@ public function __construct(private readonly ScopeInterface $scope) private function transpileBinaryOperator(BinaryOperator $binaryOperator): string { return match ($binaryOperator) { + BinaryOperator::NULLISH_COALESCE => '??', BinaryOperator::AND => '&&', BinaryOperator::OR => '||', BinaryOperator::EQUAL => '===', diff --git a/src/Target/Php/Transpiler/TemplateLiteral/TemplateLiteralTranspiler.php b/src/Target/Php/Transpiler/TemplateLiteral/TemplateLiteralTranspiler.php index 74c099d..9218416 100644 --- a/src/Target/Php/Transpiler/TemplateLiteral/TemplateLiteralTranspiler.php +++ b/src/Target/Php/Transpiler/TemplateLiteral/TemplateLiteralTranspiler.php @@ -38,16 +38,34 @@ public function __construct(private readonly ScopeInterface $scope) public function transpile(TemplateLiteralNode $templateLiteralNode): string { - $segments = []; + $lines = []; + $emptyLines = 0; + $isFirstLine = true; + foreach ($templateLiteralNode->lines->items as $line) { + if (count($line->segments->items) === 0) { + $emptyLines++; + continue; + } + + $segments = []; + foreach ($line->segments->items as $segmentNode) { + $segments[] = match ($segmentNode::class) { + TemplateLiteralStringSegmentNode::class => $this->transpileStringSegment($segmentNode), + TemplateLiteralExpressionSegmentNode::class => $this->transpileExpressionSegment($segmentNode) + }; + } + + $next = str_repeat(' ', $line->indentation - $templateLiteralNode->indentation) . join(' . ', $segments); + if (!$isFirstLine) { + $next = ' . "' . str_repeat('\n', $emptyLines + 1) . '" . ' . $next; + } - foreach ($templateLiteralNode->segments->items as $segmentNode) { - $segments[] = match ($segmentNode::class) { - TemplateLiteralStringSegmentNode::class => $this->transpileStringSegment($segmentNode), - TemplateLiteralExpressionSegmentNode::class => $this->transpileExpressionSegment($segmentNode) - }; + $lines[] = $next; + $emptyLines = 0; + $isFirstLine = false; } - return join(' . ', $segments); + return join('', $lines); } private function transpileStringSegment(TemplateLiteralStringSegmentNode $segmentNode): string diff --git a/src/TypeSystem/Resolver/BinaryOperation/BinaryOperationTypeResolver.php b/src/TypeSystem/Resolver/BinaryOperation/BinaryOperationTypeResolver.php index b81798c..9f93bd1 100644 --- a/src/TypeSystem/Resolver/BinaryOperation/BinaryOperationTypeResolver.php +++ b/src/TypeSystem/Resolver/BinaryOperation/BinaryOperationTypeResolver.php @@ -40,6 +40,7 @@ public function __construct( public function resolveTypeOf(BinaryOperationNode $binaryOperationNode): TypeInterface { return match ($binaryOperationNode->operator) { + BinaryOperator::NULLISH_COALESCE, BinaryOperator::AND, BinaryOperator::OR => $this->resolveTypeOfBooleanOperation($binaryOperationNode), diff --git a/test/Integration/Examples/Numbers/Numbers.afx b/test/Integration/Examples/Numbers/Numbers.afx index 852a474..84566c0 100644 --- a/test/Integration/Examples/Numbers/Numbers.afx +++ b/test/Integration/Examples/Numbers/Numbers.afx @@ -8,7 +8,7 @@ export component Numbers { # Binary 0b10000000000000000000000000000000 || 0b01111111100000000000000000000000 || - 0B00000000011111111111111111111111 || + 0b00000000011111111111111111111111 || # Octal 0o755 || diff --git a/test/Integration/Examples/TemplateLiteral/TemplateLiteral.afx b/test/Integration/Examples/TemplateLiteral/TemplateLiteral.afx index 8f46752..bd187b4 100644 --- a/test/Integration/Examples/TemplateLiteral/TemplateLiteral.afx +++ b/test/Integration/Examples/TemplateLiteral/TemplateLiteral.afx @@ -3,21 +3,27 @@ export component TemplateLiteral { isActive: boolean someNumber: number - return `A template literal may contain ${expression}s. + return """ + A template literal may contain {expression}s. It can span multiple lines. Interpolated Expressions can be arbitrarily complex: - ${isActive ? 27 : 17} + {isActive ? 27 : 17} They can also contain other template literals: - ${isActive ? `Is 27? ${someNumber === 27 ? "yes" : "no"}` : `Number is ${27}`} + {isActive ? """ + Is 27? {someNumber === 27 ? "yes" : "no"} + """ + : """ + Number is {27} + """} Even markup: - ${ + {

Number is {someNumber}

} - ` + """ } diff --git a/test/Integration/Examples/TemplateLiteral/TemplateLiteral.php b/test/Integration/Examples/TemplateLiteral/TemplateLiteral.php index a802f73..7df49c5 100644 --- a/test/Integration/Examples/TemplateLiteral/TemplateLiteral.php +++ b/test/Integration/Examples/TemplateLiteral/TemplateLiteral.php @@ -17,6 +17,6 @@ public function __construct( public function render(): string { - return 'A template literal may contain ' . $this->expression . 's.' . "\n\n" . ' It can span multiple lines.' . "\n\n" . ' Interpolated Expressions can be arbitrarily complex:' . "\n" . ' ' . ($this->isActive ? 27 : 17) . "\n\n" . ' They can also contain other template literals:' . "\n" . ' ' . ($this->isActive ? 'Is 27? ' . (($this->someNumber === 27) ? 'yes' : 'no') : 'Number is ' . 27) . "\n\n" . ' Even markup:' . "\n" . ' ' . '

Number is ' . $this->someNumber . '

' . "\n" . ' '; + return 'A template literal may contain ' . $this->expression . 's.' . "\n\n" . 'It can span multiple lines.' . "\n\n" . 'Interpolated Expressions can be arbitrarily complex:' . "\n" . ($this->isActive ? 27 : 17) . "\n\n" . 'They can also contain other template literals:' . "\n" . ($this->isActive ? 'Is 27? ' . (($this->someNumber === 27) ? 'yes' : 'no') : 'Number is ' . 27) . "\n\n" . 'Even markup:' . "\n" . '

Number is ' . $this->someNumber . '

'; } } diff --git a/test/Integration/PhpTranspilerIntegrationTest.php b/test/Integration/PhpTranspilerIntegrationTest.php index 12346c4..a025eb1 100644 --- a/test/Integration/PhpTranspilerIntegrationTest.php +++ b/test/Integration/PhpTranspilerIntegrationTest.php @@ -22,6 +22,7 @@ namespace PackageFactory\ComponentEngine\Test\Integration; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Parser\Module\ModuleParser; use PackageFactory\ComponentEngine\Module\Loader\ModuleFile\ModuleFileLoader; use PackageFactory\ComponentEngine\Parser\Source\Path; @@ -67,10 +68,9 @@ public function testTranspiler(string $example): void { $sourcePath = Path::fromString(__DIR__ . '/Examples/' . $example . '/' . $example . '.afx'); $source = Source::fromFile($sourcePath->value); - $tokenizer = Tokenizer::fromSource($source); - $tokens = $tokenizer->getIterator(); + $lexer = new Lexer($source->contents); - $module = ModuleParser::singleton()->parse($tokens); + $module = ModuleParser::singleton()->parse($lexer); $expected = file_get_contents(__DIR__ . '/Examples/' . $example . '/' . $example . '.php'); diff --git a/test/Unit/Language/ASTNodeFixtures.php b/test/Unit/Language/ASTNodeFixtures.php index 93652b3..09a8ef6 100644 --- a/test/Unit/Language/ASTNodeFixtures.php +++ b/test/Unit/Language/ASTNodeFixtures.php @@ -43,6 +43,7 @@ use PackageFactory\ComponentEngine\Language\AST\Node\TypeReference\TypeReferenceNode; use PackageFactory\ComponentEngine\Language\AST\Node\UnaryOperation\UnaryOperationNode; use PackageFactory\ComponentEngine\Language\AST\Node\ValueReference\ValueReferenceNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Parser\BooleanLiteral\BooleanLiteralParser; use PackageFactory\ComponentEngine\Language\Parser\ComponentDeclaration\ComponentDeclarationParser; use PackageFactory\ComponentEngine\Language\Parser\EnumDeclaration\EnumDeclarationParser; @@ -59,7 +60,6 @@ use PackageFactory\ComponentEngine\Language\Parser\Text\TextParser; use PackageFactory\ComponentEngine\Language\Parser\TypeReference\TypeReferenceParser; use PackageFactory\ComponentEngine\Language\Parser\ValueReference\ValueReferenceParser; -use PackageFactory\ComponentEngine\Test\Unit\Parser\Tokenizer\Fixtures as TokenizerFixtures; final class ASTNodeFixtures { @@ -91,87 +91,87 @@ public static function BinaryOperation(string $sourceAsString): BinaryOperationN public static function BooleanLiteral(string $sourceAsString): BooleanLiteralNode { $booleanLiteralParser = BooleanLiteralParser::singleton(); - $tokens = TokenizerFixtures::tokens($sourceAsString); + $lexer = new Lexer($sourceAsString); - return $booleanLiteralParser->parse($tokens); + return $booleanLiteralParser->parse($lexer); } public static function ComponentDeclaration(string $sourceAsString): ComponentDeclarationNode { $componentDeclarationParser = ComponentDeclarationParser::singleton(); - $tokens = TokenizerFixtures::tokens($sourceAsString); + $lexer = new Lexer($sourceAsString); - return $componentDeclarationParser->parse($tokens); + return $componentDeclarationParser->parse($lexer); } public static function EnumDeclaration(string $sourceAsString): EnumDeclarationNode { $enumDeclarationParser = EnumDeclarationParser::singleton(); - $tokens = TokenizerFixtures::tokens($sourceAsString); + $lexer = new Lexer($sourceAsString); - return $enumDeclarationParser->parse($tokens); + return $enumDeclarationParser->parse($lexer); } public static function Expression(string $sourceAsString): ExpressionNode { $epxressionParser = new ExpressionParser(); - $tokens = TokenizerFixtures::tokens($sourceAsString); + $lexer = new Lexer($sourceAsString); - return $epxressionParser->parse($tokens); + return $epxressionParser->parse($lexer); } public static function IntegerLiteral(string $sourceAsString): IntegerLiteralNode { $integerLiteralParser = IntegerLiteralParser::singleton(); - $tokens = TokenizerFixtures::tokens($sourceAsString); + $lexer = new Lexer($sourceAsString); - return $integerLiteralParser->parse($tokens); + return $integerLiteralParser->parse($lexer); } public static function Match(string $sourceAsString): MatchNode { $matchParser = MatchParser::singleton(); - $tokens = TokenizerFixtures::tokens($sourceAsString); + $lexer = new Lexer($sourceAsString); - return $matchParser->parse($tokens); + return $matchParser->parse($lexer); } public static function Module(string $sourceAsString): ModuleNode { $moduleParser = ModuleParser::singleton(); - $tokens = TokenizerFixtures::tokens($sourceAsString); + $lexer = new Lexer($sourceAsString); - return $moduleParser->parse($tokens); + return $moduleParser->parse($lexer); } public static function NullLiteral(string $sourceAsString): NullLiteralNode { $nullLiteralParser = NullLiteralParser::singleton(); - $tokens = TokenizerFixtures::tokens($sourceAsString); + $lexer = new Lexer($sourceAsString); - return $nullLiteralParser->parse($tokens); + return $nullLiteralParser->parse($lexer); } public static function PropertyDeclaration(string $sourceAsString): PropertyDeclarationNode { $propertyDeclarationParser = PropertyDeclarationParser::singleton(); - $tokens = TokenizerFixtures::tokens($sourceAsString); + $lexer = new Lexer($sourceAsString); - return $propertyDeclarationParser->parse($tokens); + return $propertyDeclarationParser->parse($lexer); } public static function StringLiteral(string $sourceAsString): StringLiteralNode { $stringLiteralParser = StringLiteralParser::singleton(); - $tokens = TokenizerFixtures::tokens($sourceAsString); + $lexer = new Lexer($sourceAsString); - return $stringLiteralParser->parse($tokens); + return $stringLiteralParser->parse($lexer); } public static function StructDeclaration(string $sourceAsString): StructDeclarationNode { $structDeclarationParser = StructDeclarationParser::singleton(); - $tokens = TokenizerFixtures::tokens($sourceAsString); + $tokens = new Lexer($sourceAsString); return $structDeclarationParser->parse($tokens); } @@ -179,9 +179,9 @@ public static function StructDeclaration(string $sourceAsString): StructDeclarat public static function Tag(string $sourceAsString): TagNode { $tagParser = TagParser::singleton(); - $tokens = TokenizerFixtures::tokens($sourceAsString); + $lexer = new Lexer($sourceAsString); - return $tagParser->parse($tokens); + return $tagParser->parse($lexer); } public static function TagContent(string $sourceAsString): null|TextNode|ExpressionNode|TagNode @@ -194,9 +194,9 @@ public static function TagContent(string $sourceAsString): null|TextNode|Express public static function TemplateLiteral(string $sourceAsString): TemplateLiteralNode { $templateLiteralParser = TemplateLiteralParser::singleton(); - $tokens = TokenizerFixtures::tokens($sourceAsString); + $lexer = new Lexer($sourceAsString); - return $templateLiteralParser->parse($tokens); + return $templateLiteralParser->parse($lexer); } public static function TernaryOperation(string $sourceAsString): TernaryOperationNode @@ -211,25 +211,25 @@ public static function TernaryOperation(string $sourceAsString): TernaryOperatio public static function Text(string $sourceAsString): ?TextNode { $textParser = TextParser::singleton(); - $tokens = TokenizerFixtures::tokens($sourceAsString); + $lexer = new Lexer($sourceAsString); - return $textParser->parse($tokens); + return $textParser->parse($lexer); } public static function TypeReference(string $sourceAsString): TypeReferenceNode { $typeReferenceParser = TypeReferenceParser::singleton(); - $tokens = TokenizerFixtures::tokens($sourceAsString); + $lexer = new Lexer($sourceAsString); - return $typeReferenceParser->parse($tokens); + return $typeReferenceParser->parse($lexer); } public static function ValueReference(string $sourceAsString): ValueReferenceNode { $valueReferenceParser = ValueReferenceParser::singleton(); - $tokens = TokenizerFixtures::tokens($sourceAsString); + $lexer = new Lexer($sourceAsString); - return $valueReferenceParser->parse($tokens); + return $valueReferenceParser->parse($lexer); } public static function UnaryOperation(string $sourceAsString): UnaryOperationNode diff --git a/test/Unit/Language/Parser/BooleanLiteral/BooleanLiteralParserTest.php b/test/Unit/Language/Parser/BooleanLiteral/BooleanLiteralParserTest.php index 447c56b..181012b 100644 --- a/test/Unit/Language/Parser/BooleanLiteral/BooleanLiteralParserTest.php +++ b/test/Unit/Language/Parser/BooleanLiteral/BooleanLiteralParserTest.php @@ -23,6 +23,7 @@ namespace PackageFactory\ComponentEngine\Test\Unit\Language\Parser\BooleanLiteral; use PackageFactory\ComponentEngine\Language\AST\Node\BooleanLiteral\BooleanLiteralNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Parser\BooleanLiteral\BooleanLiteralParser; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; @@ -34,7 +35,7 @@ final class BooleanLiteralParserTest extends ParserTestCase public function parsesTrue(): void { $booleanLiteralParser = BooleanLiteralParser::singleton(); - $tokens = $this->createTokenIterator('true'); + $lexer = new Lexer('true'); $expectedBooleanLiteralNode = new BooleanLiteralNode( rangeInSource: $this->range([0, 0], [0, 3]), @@ -43,7 +44,7 @@ public function parsesTrue(): void $this->assertEquals( $expectedBooleanLiteralNode, - $booleanLiteralParser->parse($tokens) + $booleanLiteralParser->parse($lexer) ); } @@ -53,7 +54,7 @@ public function parsesTrue(): void public function parsesFalse(): void { $booleanLiteralParser = BooleanLiteralParser::singleton(); - $tokens = $this->createTokenIterator('false'); + $lexer = new Lexer('false'); $expectedBooleanLiteralNode = new BooleanLiteralNode( rangeInSource: $this->range([0, 0], [0, 4]), @@ -62,7 +63,7 @@ public function parsesFalse(): void $this->assertEquals( $expectedBooleanLiteralNode, - $booleanLiteralParser->parse($tokens) + $booleanLiteralParser->parse($lexer) ); } } diff --git a/test/Unit/Language/Parser/ComponentDeclaration/ComponentDeclarationParserTest.php b/test/Unit/Language/Parser/ComponentDeclaration/ComponentDeclarationParserTest.php index a724ce9..aab0cfb 100644 --- a/test/Unit/Language/Parser/ComponentDeclaration/ComponentDeclarationParserTest.php +++ b/test/Unit/Language/Parser/ComponentDeclaration/ComponentDeclarationParserTest.php @@ -45,6 +45,7 @@ use PackageFactory\ComponentEngine\Language\AST\Node\TypeReference\TypeNameNodes; use PackageFactory\ComponentEngine\Language\AST\Node\TypeReference\TypeReferenceNode; use PackageFactory\ComponentEngine\Language\AST\Node\ValueReference\ValueReferenceNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Parser\ComponentDeclaration\ComponentDeclarationParser; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; @@ -56,7 +57,7 @@ final class ComponentDeclarationParserTest extends ParserTestCase public function parsesComponentDeclarationWithNoProps(): void { $componentDeclarationParser = ComponentDeclarationParser::singleton(); - $tokens = $this->createTokenIterator('component Foo { return "bar" }'); + $lexer = new Lexer('component Foo { return "bar" }'); $expectedComponentDeclarationNode = new ComponentDeclarationNode( rangeInSource: $this->range([0, 0], [0, 29]), @@ -66,9 +67,9 @@ public function parsesComponentDeclarationWithNoProps(): void ), props: new PropertyDeclarationNodes(), return: new ExpressionNode( - rangeInSource: $this->range([0, 24], [0, 26]), + rangeInSource: $this->range([0, 23], [0, 27]), root: new StringLiteralNode( - rangeInSource: $this->range([0, 24], [0, 26]), + rangeInSource: $this->range([0, 23], [0, 27]), value: 'bar' ) ) @@ -76,7 +77,7 @@ public function parsesComponentDeclarationWithNoProps(): void $this->assertEquals( $expectedComponentDeclarationNode, - $componentDeclarationParser->parse($tokens) + $componentDeclarationParser->parse($lexer) ); } @@ -86,7 +87,7 @@ public function parsesComponentDeclarationWithNoProps(): void public function parsesComponentDeclarationWithOneProp(): void { $componentDeclarationParser = ComponentDeclarationParser::singleton(); - $tokens = $this->createTokenIterator('component Foo { bar: string return bar }'); + $lexer = new Lexer('component Foo { bar: string return bar }'); $expectedComponentDeclarationNode = new ComponentDeclarationNode( rangeInSource: $this->range([0, 0], [0, 39]), @@ -125,7 +126,7 @@ public function parsesComponentDeclarationWithOneProp(): void $this->assertEquals( $expectedComponentDeclarationNode, - $componentDeclarationParser->parse($tokens) + $componentDeclarationParser->parse($lexer) ); } @@ -145,7 +146,7 @@ public function parsesComponentDeclarationWithMultiplePropsAndComplexReturnState return {children} } AFX; - $tokens = $this->createTokenIterator($componentAsString); + $lexer = new Lexer($componentAsString); $expectedComponentDeclarationNode = new ComponentDeclarationNode( rangeInSource: $this->range([0, 0], [7, 0]), @@ -299,7 +300,7 @@ public function parsesComponentDeclarationWithMultiplePropsAndComplexReturnState $this->assertEquals( $expectedComponentDeclarationNode, - $componentDeclarationParser->parse($tokens) + $componentDeclarationParser->parse($lexer) ); } } diff --git a/test/Unit/Language/Parser/EnumDeclaration/EnumDeclarationParserTest.php b/test/Unit/Language/Parser/EnumDeclaration/EnumDeclarationParserTest.php index 7eedc1d..5dc39bc 100644 --- a/test/Unit/Language/Parser/EnumDeclaration/EnumDeclarationParserTest.php +++ b/test/Unit/Language/Parser/EnumDeclaration/EnumDeclarationParserTest.php @@ -33,6 +33,7 @@ use PackageFactory\ComponentEngine\Language\AST\Node\IntegerLiteral\IntegerFormat; use PackageFactory\ComponentEngine\Language\AST\Node\IntegerLiteral\IntegerLiteralNode; use PackageFactory\ComponentEngine\Language\AST\Node\StringLiteral\StringLiteralNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Parser\EnumDeclaration\EnumDeclarationParser; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; @@ -44,7 +45,7 @@ final class EnumDeclarationParserTest extends ParserTestCase public function parsesEnumDeclarationWithOneValuelessMember(): void { $enumDeclarationParser = EnumDeclarationParser::singleton(); - $tokens = $this->createTokenIterator('enum Foo { BAR }'); + $lexer = new Lexer('enum Foo { BAR }'); $expectedEnumDeclarationNode = new EnumDeclarationNode( rangeInSource: $this->range([0, 0], [0, 15]), @@ -66,7 +67,7 @@ public function parsesEnumDeclarationWithOneValuelessMember(): void $this->assertEquals( $expectedEnumDeclarationNode, - $enumDeclarationParser->parse($tokens) + $enumDeclarationParser->parse($lexer) ); } @@ -76,7 +77,7 @@ public function parsesEnumDeclarationWithOneValuelessMember(): void public function parsesEnumDeclarationWithThreeValuelessMembers(): void { $enumDeclarationParser = EnumDeclarationParser::singleton(); - $tokens = $this->createTokenIterator('enum Foo { BAR BAZ QUX }'); + $lexer = new Lexer('enum Foo { BAR BAZ QUX }'); $expectedEnumDeclarationNode = new EnumDeclarationNode( rangeInSource: $this->range([0, 0], [0, 23]), @@ -114,7 +115,7 @@ public function parsesEnumDeclarationWithThreeValuelessMembers(): void $this->assertEquals( $expectedEnumDeclarationNode, - $enumDeclarationParser->parse($tokens) + $enumDeclarationParser->parse($lexer) ); } @@ -124,7 +125,7 @@ public function parsesEnumDeclarationWithThreeValuelessMembers(): void public function parsesEnumDeclarationWithOneStringValueMember(): void { $enumDeclarationParser = EnumDeclarationParser::singleton(); - $tokens = $this->createTokenIterator('enum Foo { BAR("BAR") }'); + $lexer = new Lexer('enum Foo { BAR("BAR") }'); $expectedEnumDeclarationNode = new EnumDeclarationNode( rangeInSource: $this->range([0, 0], [0, 22]), @@ -142,7 +143,7 @@ public function parsesEnumDeclarationWithOneStringValueMember(): void value: new EnumMemberValueNode( rangeInSource: $this->range([0, 14], [0, 20]), value: new StringLiteralNode( - rangeInSource: $this->range([0, 16], [0, 18]), + rangeInSource: $this->range([0, 15], [0, 19]), value: 'BAR' ) ) @@ -152,7 +153,7 @@ public function parsesEnumDeclarationWithOneStringValueMember(): void $this->assertEquals( $expectedEnumDeclarationNode, - $enumDeclarationParser->parse($tokens) + $enumDeclarationParser->parse($lexer) ); } @@ -173,7 +174,7 @@ enum Weekday { SUNDAY("sun") } AFX; - $tokens = $this->createTokenIterator($enumAsString); + $lexer = new Lexer($enumAsString); $expectedEnumDeclarationNode = new EnumDeclarationNode( rangeInSource: $this->range([0, 0], [8, 0]), @@ -191,7 +192,7 @@ enum Weekday { value: new EnumMemberValueNode( rangeInSource: $this->range([1, 10], [1, 16]), value: new StringLiteralNode( - rangeInSource: $this->range([1, 12], [1, 14]), + rangeInSource: $this->range([1, 11], [1, 15]), value: 'mon' ) ) @@ -205,7 +206,7 @@ enum Weekday { value: new EnumMemberValueNode( rangeInSource: $this->range([2, 11], [2, 17]), value: new StringLiteralNode( - rangeInSource: $this->range([2, 13], [2, 15]), + rangeInSource: $this->range([2, 12], [2, 16]), value: 'tue' ) ) @@ -219,7 +220,7 @@ enum Weekday { value: new EnumMemberValueNode( rangeInSource: $this->range([3, 13], [3, 19]), value: new StringLiteralNode( - rangeInSource: $this->range([3, 15], [3, 17]), + rangeInSource: $this->range([3, 14], [3, 18]), value: 'wed' ) ) @@ -233,7 +234,7 @@ enum Weekday { value: new EnumMemberValueNode( rangeInSource: $this->range([4, 12], [4, 18]), value: new StringLiteralNode( - rangeInSource: $this->range([4, 14], [4, 16]), + rangeInSource: $this->range([4, 13], [4, 17]), value: 'thu' ) ) @@ -247,7 +248,7 @@ enum Weekday { value: new EnumMemberValueNode( rangeInSource: $this->range([5, 10], [5, 16]), value: new StringLiteralNode( - rangeInSource: $this->range([5, 12], [5, 14]), + rangeInSource: $this->range([5, 11], [5, 15]), value: 'fri' ) ) @@ -261,7 +262,7 @@ enum Weekday { value: new EnumMemberValueNode( rangeInSource: $this->range([6, 12], [6, 18]), value: new StringLiteralNode( - rangeInSource: $this->range([6, 14], [6, 16]), + rangeInSource: $this->range([6, 13], [6, 17]), value: 'sat' ) ) @@ -275,7 +276,7 @@ enum Weekday { value: new EnumMemberValueNode( rangeInSource: $this->range([7, 10], [7, 16]), value: new StringLiteralNode( - rangeInSource: $this->range([7, 12], [7, 14]), + rangeInSource: $this->range([7, 11], [7, 15]), value: 'sun' ) ) @@ -285,7 +286,7 @@ enum Weekday { $this->assertEquals( $expectedEnumDeclarationNode, - $enumDeclarationParser->parse($tokens) + $enumDeclarationParser->parse($lexer) ); } @@ -295,7 +296,7 @@ enum Weekday { public function parsesEnumDeclarationWithOneBinaryIntegerValueMember(): void { $enumDeclarationParser = EnumDeclarationParser::singleton(); - $tokens = $this->createTokenIterator('enum Foo { BAR(0b101) }'); + $lexer = new Lexer('enum Foo { BAR(0b101) }'); $expectedEnumDeclarationNode = new EnumDeclarationNode( rangeInSource: $this->range([0, 0], [0, 22]), @@ -324,7 +325,7 @@ public function parsesEnumDeclarationWithOneBinaryIntegerValueMember(): void $this->assertEquals( $expectedEnumDeclarationNode, - $enumDeclarationParser->parse($tokens) + $enumDeclarationParser->parse($lexer) ); } @@ -334,7 +335,7 @@ public function parsesEnumDeclarationWithOneBinaryIntegerValueMember(): void public function parsesEnumDeclarationWithOneOctalIntegerValueMember(): void { $enumDeclarationParser = EnumDeclarationParser::singleton(); - $tokens = $this->createTokenIterator('enum Foo { BAR(0o644) }'); + $lexer = new Lexer('enum Foo { BAR(0o644) }'); $expectedEnumDeclarationNode = new EnumDeclarationNode( rangeInSource: $this->range([0, 0], [0, 22]), @@ -363,7 +364,7 @@ public function parsesEnumDeclarationWithOneOctalIntegerValueMember(): void $this->assertEquals( $expectedEnumDeclarationNode, - $enumDeclarationParser->parse($tokens) + $enumDeclarationParser->parse($lexer) ); } @@ -373,7 +374,7 @@ public function parsesEnumDeclarationWithOneOctalIntegerValueMember(): void public function parsesEnumDeclarationWithOneDecimalIntegerValueMember(): void { $enumDeclarationParser = EnumDeclarationParser::singleton(); - $tokens = $this->createTokenIterator('enum Foo { BAR(42) }'); + $lexer = new Lexer('enum Foo { BAR(42) }'); $expectedEnumDeclarationNode = new EnumDeclarationNode( rangeInSource: $this->range([0, 0], [0, 19]), @@ -402,7 +403,7 @@ public function parsesEnumDeclarationWithOneDecimalIntegerValueMember(): void $this->assertEquals( $expectedEnumDeclarationNode, - $enumDeclarationParser->parse($tokens) + $enumDeclarationParser->parse($lexer) ); } @@ -412,7 +413,7 @@ public function parsesEnumDeclarationWithOneDecimalIntegerValueMember(): void public function parsesEnumDeclarationWithOneHexadecimalIntegerValueMember(): void { $enumDeclarationParser = EnumDeclarationParser::singleton(); - $tokens = $this->createTokenIterator('enum Foo { BAR(0xABC) }'); + $lexer = new Lexer('enum Foo { BAR(0xABC) }'); $expectedEnumDeclarationNode = new EnumDeclarationNode( rangeInSource: $this->range([0, 0], [0, 22]), @@ -441,7 +442,7 @@ public function parsesEnumDeclarationWithOneHexadecimalIntegerValueMember(): voi $this->assertEquals( $expectedEnumDeclarationNode, - $enumDeclarationParser->parse($tokens) + $enumDeclarationParser->parse($lexer) ); } @@ -467,7 +468,7 @@ enum Month { DECEMBER(12) } AFX; - $tokens = $this->createTokenIterator($enumAsString); + $lexer = new Lexer($enumAsString); $expectedEnumDeclarationNode = new EnumDeclarationNode( rangeInSource: $this->range([0, 0], [13, 0]), @@ -661,7 +662,7 @@ enum Month { $this->assertEquals( $expectedEnumDeclarationNode, - $enumDeclarationParser->parse($tokens) + $enumDeclarationParser->parse($lexer) ); } } diff --git a/test/Unit/Language/Parser/Export/ExportParserTest.php b/test/Unit/Language/Parser/Export/ExportParserTest.php index 37a45f9..fd75d3b 100644 --- a/test/Unit/Language/Parser/Export/ExportParserTest.php +++ b/test/Unit/Language/Parser/Export/ExportParserTest.php @@ -47,12 +47,12 @@ use PackageFactory\ComponentEngine\Language\AST\Node\TypeReference\TypeNameNodes; use PackageFactory\ComponentEngine\Language\AST\Node\TypeReference\TypeReferenceNode; use PackageFactory\ComponentEngine\Language\AST\Node\ValueReference\ValueReferenceNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\LexerException; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; use PackageFactory\ComponentEngine\Language\Parser\Export\ExportCouldNotBeParsed; use PackageFactory\ComponentEngine\Language\Parser\Export\ExportParser; -use PackageFactory\ComponentEngine\Parser\Source\Path; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenTypes; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; final class ExportParserTest extends ParserTestCase @@ -63,7 +63,7 @@ final class ExportParserTest extends ParserTestCase public function parsesComponentExport(): void { $exportParser = ExportParser::singleton(); - $tokens = $this->createTokenIterator( + $lexer = new Lexer( 'export component Foo { return bar }' ); @@ -88,7 +88,7 @@ public function parsesComponentExport(): void $this->assertEquals( $expectedExportNode, - $exportParser->parse($tokens) + $exportParser->parse($lexer) ); } @@ -98,7 +98,7 @@ public function parsesComponentExport(): void public function parsesEnumExport(): void { $exportParser = ExportParser::singleton(); - $tokens = $this->createTokenIterator( + $lexer = new Lexer( 'export enum Foo { BAR }' ); @@ -125,7 +125,7 @@ public function parsesEnumExport(): void $this->assertEquals( $expectedExportNode, - $exportParser->parse($tokens) + $exportParser->parse($lexer) ); } @@ -135,7 +135,7 @@ public function parsesEnumExport(): void public function parsesStructExport(): void { $exportParser = ExportParser::singleton(); - $tokens = $this->createTokenIterator( + $lexer = new Lexer( 'export struct Foo { bar: baz }' ); @@ -172,7 +172,7 @@ public function parsesStructExport(): void $this->assertEquals( $expectedExportNode, - $exportParser->parse($tokens) + $exportParser->parse($lexer) ); } @@ -184,22 +184,20 @@ public function throwsIfExportIsNoDeclaration(): void $this->assertThrowsParserException( function () { $exportParser = ExportParser::singleton(); - $tokens = $this->createTokenIterator('export null'); + $lexer = new Lexer('export null'); - $exportParser->parse($tokens); + $exportParser->parse($lexer); }, - ExportCouldNotBeParsed::becauseOfUnexpectedToken( - expectedTokenTypes: TokenTypes::from( - TokenType::KEYWORD_COMPONENT, - TokenType::KEYWORD_ENUM, - TokenType::KEYWORD_STRUCT + ExportCouldNotBeParsed::becauseOfLexerException( + cause: LexerException::becauseOfUnexpectedCharacterSequence( + expectedTokenTypes: TokenTypes::from( + TokenType::KEYWORD_COMPONENT, + TokenType::KEYWORD_ENUM, + TokenType::KEYWORD_STRUCT + ), + affectedRangeInSource: $this->range([0, 7], [0, 7]), + actualCharacterSequence: 'n' ), - actualToken: new Token( - type: TokenType::KEYWORD_NULL, - value: 'null', - boundaries: $this->range([0, 7], [0, 10]), - sourcePath: Path::createMemory() - ) ) ); } diff --git a/test/Unit/Language/Parser/Expression/ExpressionParserTest.php b/test/Unit/Language/Parser/Expression/ExpressionParserTest.php index a7bfb49..bf306e9 100644 --- a/test/Unit/Language/Parser/Expression/ExpressionParserTest.php +++ b/test/Unit/Language/Parser/Expression/ExpressionParserTest.php @@ -22,7 +22,6 @@ namespace PackageFactory\ComponentEngine\Test\Unit\Language\Parser\Expression; -use ArrayIterator; use PackageFactory\ComponentEngine\Domain\AttributeName\AttributeName; use PackageFactory\ComponentEngine\Domain\PropertyName\PropertyName; use PackageFactory\ComponentEngine\Domain\TagName\TagName; @@ -49,6 +48,8 @@ use PackageFactory\ComponentEngine\Language\AST\Node\Tag\TagNameNode; use PackageFactory\ComponentEngine\Language\AST\Node\Tag\TagNode; use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralExpressionSegmentNode; +use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralLine; +use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralLines; use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralNode; use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralSegments; use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralStringSegmentNode; @@ -57,8 +58,8 @@ use PackageFactory\ComponentEngine\Language\AST\Node\UnaryOperation\UnaryOperationNode; use PackageFactory\ComponentEngine\Language\AST\Node\UnaryOperation\UnaryOperator; use PackageFactory\ComponentEngine\Language\AST\Node\ValueReference\ValueReferenceNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Parser\Expression\ExpressionParser; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; final class ExpressionParserTest extends ParserTestCase @@ -69,7 +70,7 @@ final class ExpressionParserTest extends ParserTestCase public function parsesMandatoryAccessWithOneLevel(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('a.b'); + $lexer = new Lexer('a.b'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 2]), @@ -92,7 +93,7 @@ public function parsesMandatoryAccessWithOneLevel(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -102,7 +103,7 @@ public function parsesMandatoryAccessWithOneLevel(): void public function parsesMandatoryAccessWithMultipleLevels(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('a.b.c.d.e'); + $lexer = new Lexer('a.b.c.d.e'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 8]), @@ -158,7 +159,7 @@ public function parsesMandatoryAccessWithMultipleLevels(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -168,7 +169,7 @@ public function parsesMandatoryAccessWithMultipleLevels(): void public function parsesOptionalAccessWithOneLevel(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('a?.b'); + $lexer = new Lexer('a?.b'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 3]), @@ -191,7 +192,7 @@ public function parsesOptionalAccessWithOneLevel(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -201,7 +202,7 @@ public function parsesOptionalAccessWithOneLevel(): void public function parsesOptionalAccessWithMultipleLevels(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('a?.b?.c?.d?.e'); + $lexer = new Lexer('a?.b?.c?.d?.e'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 12]), @@ -257,7 +258,7 @@ public function parsesOptionalAccessWithMultipleLevels(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -267,7 +268,7 @@ public function parsesOptionalAccessWithMultipleLevels(): void public function parsesMixedAccessChainStartingWithMandatoryAccess(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('a.b?.c'); + $lexer = new Lexer('a.b?.c'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 5]), @@ -301,7 +302,7 @@ public function parsesMixedAccessChainStartingWithMandatoryAccess(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -311,7 +312,7 @@ public function parsesMixedAccessChainStartingWithMandatoryAccess(): void public function parsesMixedAccessChainStartingWithOptionalAccess(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('a?.b.c'); + $lexer = new Lexer('a?.b.c'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 5]), @@ -345,7 +346,7 @@ public function parsesMixedAccessChainStartingWithOptionalAccess(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -355,7 +356,7 @@ public function parsesMixedAccessChainStartingWithOptionalAccess(): void public function parsesMandatoryAccessWithBracketedEpxressionAsParent(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('(a ? b : c).d'); + $lexer = new Lexer('(a ? b : c).d'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 12]), @@ -397,7 +398,7 @@ public function parsesMandatoryAccessWithBracketedEpxressionAsParent(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -407,7 +408,7 @@ public function parsesMandatoryAccessWithBracketedEpxressionAsParent(): void public function parsesOptionalAccessWithBracketedEpxressionAsParent(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('(a ? b : c)?.d'); + $lexer = new Lexer('(a ? b : c)?.d'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 13]), @@ -449,7 +450,7 @@ public function parsesOptionalAccessWithBracketedEpxressionAsParent(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -459,7 +460,7 @@ public function parsesOptionalAccessWithBracketedEpxressionAsParent(): void public function parsesBinaryOperationAnd(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('a && b'); + $lexer = new Lexer('a && b'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 5]), @@ -485,7 +486,7 @@ public function parsesBinaryOperationAnd(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -495,7 +496,7 @@ public function parsesBinaryOperationAnd(): void public function parsesBinaryOperationOr(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('a || b'); + $lexer = new Lexer('a || b'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 5]), @@ -521,7 +522,7 @@ public function parsesBinaryOperationOr(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -531,7 +532,7 @@ public function parsesBinaryOperationOr(): void public function parsesBinaryOperationEquals(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('a === b'); + $lexer = new Lexer('a === b'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 6]), @@ -557,7 +558,7 @@ public function parsesBinaryOperationEquals(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -567,7 +568,7 @@ public function parsesBinaryOperationEquals(): void public function parsesBinaryOperationNotEquals(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('a !== b'); + $lexer = new Lexer('a !== b'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 6]), @@ -593,7 +594,7 @@ public function parsesBinaryOperationNotEquals(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -603,7 +604,7 @@ public function parsesBinaryOperationNotEquals(): void public function parsesBinaryOperationGreaterThan(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('a > b'); + $lexer = new Lexer('a > b'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 4]), @@ -629,7 +630,7 @@ public function parsesBinaryOperationGreaterThan(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -639,7 +640,7 @@ public function parsesBinaryOperationGreaterThan(): void public function parsesBinaryOperationGreaterThanOrEqual(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('a >= b'); + $lexer = new Lexer('a >= b'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 5]), @@ -665,7 +666,7 @@ public function parsesBinaryOperationGreaterThanOrEqual(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -675,7 +676,7 @@ public function parsesBinaryOperationGreaterThanOrEqual(): void public function parsesBinaryOperationLessThan(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('a < b'); + $lexer = new Lexer('a < b'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 4]), @@ -701,7 +702,7 @@ public function parsesBinaryOperationLessThan(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -711,7 +712,7 @@ public function parsesBinaryOperationLessThan(): void public function parsesBinaryOperationLessThanOrEqual(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('a <= b'); + $lexer = new Lexer('a <= b'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 5]), @@ -737,7 +738,7 @@ public function parsesBinaryOperationLessThanOrEqual(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -747,7 +748,7 @@ public function parsesBinaryOperationLessThanOrEqual(): void public function parsesBinaryOperationInBrackets(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('(a <= b)'); + $lexer = new Lexer('(a <= b)'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 7]), @@ -773,7 +774,7 @@ public function parsesBinaryOperationInBrackets(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -783,7 +784,7 @@ public function parsesBinaryOperationInBrackets(): void public function parsesBinaryOperationInMultipleBrackets(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('((((a <= b))))'); + $lexer = new Lexer('((((a <= b))))'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 13]), @@ -809,7 +810,7 @@ public function parsesBinaryOperationInMultipleBrackets(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -819,7 +820,7 @@ public function parsesBinaryOperationInMultipleBrackets(): void public function parsesBooleanLiteralTrue(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('true'); + $lexer = new Lexer('true'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 3]), @@ -831,7 +832,7 @@ public function parsesBooleanLiteralTrue(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -841,7 +842,7 @@ public function parsesBooleanLiteralTrue(): void public function parsesBooleanLiteralFalse(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('false'); + $lexer = new Lexer('false'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 4]), @@ -853,7 +854,7 @@ public function parsesBooleanLiteralFalse(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -863,7 +864,7 @@ public function parsesBooleanLiteralFalse(): void public function parsesBinaryIntegerLiteral(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('0b1001'); + $lexer = new Lexer('0b1001'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 5]), @@ -876,7 +877,7 @@ public function parsesBinaryIntegerLiteral(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -886,7 +887,7 @@ public function parsesBinaryIntegerLiteral(): void public function parsesOctalIntegerLiteral(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('0o755'); + $lexer = new Lexer('0o755'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 4]), @@ -899,7 +900,7 @@ public function parsesOctalIntegerLiteral(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -909,7 +910,7 @@ public function parsesOctalIntegerLiteral(): void public function parsesDecimalIntegerLiteral(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('42'); + $lexer = new Lexer('42'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 1]), @@ -922,7 +923,7 @@ public function parsesDecimalIntegerLiteral(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -932,7 +933,7 @@ public function parsesDecimalIntegerLiteral(): void public function parsesHexadecimalIntegerLiteral(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('0xABC'); + $lexer = new Lexer('0xABC'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 4]), @@ -945,7 +946,7 @@ public function parsesHexadecimalIntegerLiteral(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -963,7 +964,7 @@ public function parsesMatch(): void default -> "N/A" } AFX; - $tokens = $this->createTokenIterator($matchAsString); + $lexer = new Lexer($matchAsString); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [5, 0]), @@ -1077,12 +1078,12 @@ public function parsesMatch(): void ) ), new MatchArmNode( - rangeInSource: $this->range([4, 4], [4, 18]), + rangeInSource: $this->range([4, 4], [4, 19]), left: null, right: new ExpressionNode( - rangeInSource: $this->range([4, 16], [4, 18]), + rangeInSource: $this->range([4, 15], [4, 19]), root: new StringLiteralNode( - rangeInSource: $this->range([4, 16], [4, 18]), + rangeInSource: $this->range([4, 15], [4, 19]), value: 'N/A' ) ) @@ -1093,7 +1094,7 @@ public function parsesMatch(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -1103,7 +1104,7 @@ public function parsesMatch(): void public function parsesNullLiteral(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('null'); + $lexer = new Lexer('null'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 3]), @@ -1114,7 +1115,7 @@ public function parsesNullLiteral(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -1124,19 +1125,19 @@ public function parsesNullLiteral(): void public function parsesStringLiteral(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('"Hello World"'); + $lexer = new Lexer('"Hello World"'); $expectedExpressioNode = new ExpressionNode( - rangeInSource: $this->range([0, 1], [0, 11]), + rangeInSource: $this->range([0, 0], [0, 12]), root: new StringLiteralNode( - rangeInSource: $this->range([0, 1], [0, 11]), + rangeInSource: $this->range([0, 0], [0, 12]), value: 'Hello World' ) ); $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -1146,7 +1147,7 @@ public function parsesStringLiteral(): void public function parsesTag(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('Bar!'); + $lexer = new Lexer('Bar!'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 22]), @@ -1158,13 +1159,13 @@ public function parsesTag(): void ), attributes: new AttributeNodes( new AttributeNode( - rangeInSource: $this->range([0, 3], [0, 12]), + rangeInSource: $this->range([0, 3], [0, 13]), name: new AttributeNameNode( rangeInSource: $this->range([0, 3], [0, 6]), value: AttributeName::from('href') ), value: new StringLiteralNode( - rangeInSource: $this->range([0, 9], [0, 12]), + rangeInSource: $this->range([0, 8], [0, 13]), value: '#foo' ) ) @@ -1181,7 +1182,7 @@ public function parsesTag(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -1191,38 +1192,48 @@ public function parsesTag(): void public function parsesTemplateLiteral(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('`Hello ${friend}!`'); + $lexer = new Lexer(<<range([0, 0], [0, 17]), + rangeInSource: $this->range([0, 0], [2, 2]), root: new TemplateLiteralNode( - rangeInSource: $this->range([0, 0], [0, 17]), - segments: new TemplateLiteralSegments( - new TemplateLiteralStringSegmentNode( - rangeInSource: $this->range([0, 1], [0, 6]), - value: 'Hello ' - ), - new TemplateLiteralExpressionSegmentNode( - rangeInSource: $this->range([0, 7], [0, 15]), - expression: new ExpressionNode( - rangeInSource: $this->range([0, 9], [0, 14]), - root: new ValueReferenceNode( - rangeInSource: $this->range([0, 9], [0, 14]), - name: VariableName::from('friend') - ) + rangeInSource: $this->range([0, 0], [2, 2]), + indentation: 0, + lines: new TemplateLiteralLines( + new TemplateLiteralLine( + indentation: 0, + segments: new TemplateLiteralSegments( + new TemplateLiteralStringSegmentNode( + rangeInSource: $this->range([1, 0], [1, 5]), + value: 'Hello ' + ), + new TemplateLiteralExpressionSegmentNode( + rangeInSource: $this->range([1, 6], [1, 13]), + expression: new ExpressionNode( + rangeInSource: $this->range([1, 7], [1, 12]), + root: new ValueReferenceNode( + rangeInSource: $this->range([1, 7], [1, 12]), + name: VariableName::from('friend') + ) + ) + ), + new TemplateLiteralStringSegmentNode( + rangeInSource: $this->range([1, 14], [1, 14]), + value: '!' + ), ) - ), - new TemplateLiteralStringSegmentNode( - rangeInSource: $this->range([0, 16], [0, 16]), - value: '!' - ), + ) ) ) ); $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -1232,7 +1243,7 @@ public function parsesTemplateLiteral(): void public function parsesTernaryOperation(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('a ? b : c'); + $lexer = new Lexer('a ? b : c'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 8]), @@ -1263,7 +1274,7 @@ public function parsesTernaryOperation(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -1273,7 +1284,7 @@ public function parsesTernaryOperation(): void public function parsesNestedBracketedTernaryOperation(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('(a ? b : c) ? (d ? e : f) : (g ? h : i)'); + $lexer = new Lexer('(a ? b : c) ? (d ? e : f) : (g ? h : i)'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 38]), @@ -1361,7 +1372,7 @@ public function parsesNestedBracketedTernaryOperation(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -1371,7 +1382,7 @@ public function parsesNestedBracketedTernaryOperation(): void public function parsesNestedUnbracketedTernaryOperation(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('a < b ? "yes" : (foo ? "maybe" : "no")'); + $lexer = new Lexer('a < b ? "yes" : (foo ? "maybe" : "no")'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 37]), @@ -1398,9 +1409,9 @@ public function parsesNestedUnbracketedTernaryOperation(): void ) ), trueBranch: new ExpressionNode( - rangeInSource: $this->range([0, 9], [0, 11]), + rangeInSource: $this->range([0, 8], [0, 12]), root: new StringLiteralNode( - rangeInSource: $this->range([0, 9], [0, 11]), + rangeInSource: $this->range([0, 8], [0, 12]), value: 'yes' ) ), @@ -1415,16 +1426,16 @@ public function parsesNestedUnbracketedTernaryOperation(): void ), ), trueBranch: new ExpressionNode( - rangeInSource: $this->range([0, 24], [0, 28]), + rangeInSource: $this->range([0, 23], [0, 29]), root: new StringLiteralNode( - rangeInSource: $this->range([0, 24], [0, 28]), + rangeInSource: $this->range([0, 23], [0, 29]), value: 'maybe' ) ), falseBranch: new ExpressionNode( - rangeInSource: $this->range([0, 34], [0, 35]), + rangeInSource: $this->range([0, 33], [0, 36]), root: new StringLiteralNode( - rangeInSource: $this->range([0, 34], [0, 35]), + rangeInSource: $this->range([0, 33], [0, 36]), value: 'no' ) ) @@ -1435,7 +1446,7 @@ public function parsesNestedUnbracketedTernaryOperation(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -1445,12 +1456,12 @@ public function parsesNestedUnbracketedTernaryOperation(): void public function parsesTernaryOperationWithComplexUnbracketedCondition(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator( + $lexer = new Lexer( '1 < 2 === a || 5 > b || c === true && false ? "a" : "foo"' ); $expectedExpressioNode = new ExpressionNode( - rangeInSource: $this->range([0, 0], [0, 55]), + rangeInSource: $this->range([0, 0], [0, 56]), root: new TernaryOperationNode( condition: new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 42]), @@ -1561,16 +1572,16 @@ public function parsesTernaryOperationWithComplexUnbracketedCondition(): void ) ), trueBranch: new ExpressionNode( - rangeInSource: $this->range([0, 47], [0, 47]), + rangeInSource: $this->range([0, 46], [0, 48]), root: new StringLiteralNode( - rangeInSource: $this->range([0, 47], [0, 47]), + rangeInSource: $this->range([0, 46], [0, 48]), value: 'a' ) ), falseBranch: new ExpressionNode( - rangeInSource: $this->range([0, 53], [0, 55]), + rangeInSource: $this->range([0, 52], [0, 56]), root: new StringLiteralNode( - rangeInSource: $this->range([0, 53], [0, 55]), + rangeInSource: $this->range([0, 52], [0, 56]), value: 'foo' ) ) @@ -1579,7 +1590,7 @@ public function parsesTernaryOperationWithComplexUnbracketedCondition(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -1589,7 +1600,7 @@ public function parsesTernaryOperationWithComplexUnbracketedCondition(): void public function parsesTernaryOperationWithComplexParentheses(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('(((foo)) === ((null))) ? 1 : (((0)))'); + $lexer = new Lexer('(((foo)) === ((null))) ? 1 : (((0)))'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 35]), @@ -1635,7 +1646,7 @@ public function parsesTernaryOperationWithComplexParentheses(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -1645,7 +1656,7 @@ public function parsesTernaryOperationWithComplexParentheses(): void public function parsesUnaryOperation(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('!a'); + $lexer = new Lexer('!a'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 1]), @@ -1664,7 +1675,7 @@ public function parsesUnaryOperation(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -1674,7 +1685,7 @@ public function parsesUnaryOperation(): void public function parsesDoubleUnaryOperation(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('!!a'); + $lexer = new Lexer('!!a'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 2]), @@ -1700,7 +1711,7 @@ public function parsesDoubleUnaryOperation(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -1710,7 +1721,7 @@ public function parsesDoubleUnaryOperation(): void public function parsesTripleUnaryOperation(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('!!!a'); + $lexer = new Lexer('!!!a'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 3]), @@ -1743,7 +1754,7 @@ public function parsesTripleUnaryOperation(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -1753,7 +1764,7 @@ public function parsesTripleUnaryOperation(): void public function parsesUnaryOperationWithBracketedExpressionAsOperand(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('!(a > b)'); + $lexer = new Lexer('!(a > b)'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 7]), @@ -1786,7 +1797,7 @@ public function parsesUnaryOperationWithBracketedExpressionAsOperand(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -1796,7 +1807,7 @@ public function parsesUnaryOperationWithBracketedExpressionAsOperand(): void public function parsesValueReference(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('foo'); + $lexer = new Lexer('foo'); $expectedExpressioNode = new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 2]), @@ -1808,7 +1819,7 @@ public function parsesValueReference(): void $this->assertEquals( $expectedExpressioNode, - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } @@ -1819,7 +1830,7 @@ public function parsesMultipleParenthesesAroundValureReferenceCorrecly(): void { $expressionParser = new ExpressionParser(); - $tokens = $this->createTokenIterator('(foo)'); + $lexer = new Lexer('(foo)'); $this->assertEquals( new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 4]), @@ -1828,10 +1839,10 @@ public function parsesMultipleParenthesesAroundValureReferenceCorrecly(): void name: VariableName::from('foo') ) ), - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); - $tokens = $this->createTokenIterator('((foo))'); + $lexer = new Lexer('((foo))'); $this->assertEquals( new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 6]), @@ -1840,10 +1851,10 @@ public function parsesMultipleParenthesesAroundValureReferenceCorrecly(): void name: VariableName::from('foo') ) ), - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); - $tokens = $this->createTokenIterator('(((foo)))'); + $lexer = new Lexer('(((foo)))'); $this->assertEquals( new ExpressionNode( rangeInSource: $this->range([0, 0], [0, 8]), @@ -1852,7 +1863,7 @@ public function parsesMultipleParenthesesAroundValureReferenceCorrecly(): void name: VariableName::from('foo') ) ), - $expressionParser->parse($tokens) + $expressionParser->parse($lexer) ); } } diff --git a/test/Unit/Language/Parser/Import/ImportParserTest.php b/test/Unit/Language/Parser/Import/ImportParserTest.php index bf7779e..c80aa65 100644 --- a/test/Unit/Language/Parser/Import/ImportParserTest.php +++ b/test/Unit/Language/Parser/Import/ImportParserTest.php @@ -28,6 +28,7 @@ use PackageFactory\ComponentEngine\Language\AST\Node\Import\ImportNode; use PackageFactory\ComponentEngine\Language\AST\Node\Import\InvalidImportedNameNodes; use PackageFactory\ComponentEngine\Language\AST\Node\StringLiteral\StringLiteralNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Parser\Import\ImportCouldNotBeParsed; use PackageFactory\ComponentEngine\Language\Parser\Import\ImportParser; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; @@ -40,14 +41,14 @@ final class ImportParserTest extends ParserTestCase public function parsesImportWithOneName(): void { $importParser = ImportParser::singleton(); - $tokens = $this->createTokenIterator( + $lexer = new Lexer( 'from "/some/where/in/the/filesystem" import { Foo }' ); $expectedImportNode = new ImportNode( rangeInSource: $this->range([0, 0], [0, 50]), path: new StringLiteralNode( - rangeInSource: $this->range([0, 6], [0, 34]), + rangeInSource: $this->range([0, 5], [0, 35]), value: '/some/where/in/the/filesystem' ), names: new ImportedNameNodes( @@ -60,7 +61,7 @@ public function parsesImportWithOneName(): void $this->assertEquals( $expectedImportNode, - $importParser->parse($tokens) + $importParser->parse($lexer) ); } @@ -70,14 +71,14 @@ public function parsesImportWithOneName(): void public function parsesImportWithMultipleNames(): void { $importParser = ImportParser::singleton(); - $tokens = $this->createTokenIterator( + $lexer = new Lexer( 'from "./some/other.component" import { Foo, Bar, Baz }' ); $expectedImportNode = new ImportNode( rangeInSource: $this->range([0, 0], [0, 53]), path: new StringLiteralNode( - rangeInSource: $this->range([0, 6], [0, 27]), + rangeInSource: $this->range([0, 5], [0, 28]), value: './some/other.component' ), names: new ImportedNameNodes( @@ -98,7 +99,7 @@ public function parsesImportWithMultipleNames(): void $this->assertEquals( $expectedImportNode, - $importParser->parse($tokens) + $importParser->parse($lexer) ); } @@ -110,15 +111,15 @@ public function throwsIfEmptyImportOccurs(): void $this->assertThrowsParserException( function () { $importParser = ImportParser::singleton(); - $tokens = $this->createTokenIterator( + $lexer = new Lexer( 'from "/some/where" import {}' ); - $importParser->parse($tokens); + $importParser->parse($lexer); }, ImportCouldNotBeParsed::becauseOfInvalidImportedNameNodes( cause: InvalidImportedNameNodes::becauseTheyWereEmpty(), - affectedRangeInSource: $this->range([0, 26], [0, 26]) + affectedRangeInSource: $this->range([0, 26], [0, 27]) ) ); } @@ -131,11 +132,11 @@ public function throwsIfDuplicateImportsOccur(): void $this->assertThrowsParserException( function () { $importParser = ImportParser::singleton(); - $tokens = $this->createTokenIterator( + $lexer = new Lexer( 'from "/some/where" import { Foo, Bar, Baz, Bar, Qux }' ); - $importParser->parse($tokens); + $importParser->parse($lexer); }, ImportCouldNotBeParsed::becauseOfInvalidImportedNameNodes( cause: InvalidImportedNameNodes::becauseTheyContainDuplicates( diff --git a/test/Unit/Language/Parser/IntegerLiteral/IntegerLiteralParserTest.php b/test/Unit/Language/Parser/IntegerLiteral/IntegerLiteralParserTest.php index 5831a74..6a6f6f6 100644 --- a/test/Unit/Language/Parser/IntegerLiteral/IntegerLiteralParserTest.php +++ b/test/Unit/Language/Parser/IntegerLiteral/IntegerLiteralParserTest.php @@ -24,12 +24,12 @@ use PackageFactory\ComponentEngine\Language\AST\Node\IntegerLiteral\IntegerFormat; use PackageFactory\ComponentEngine\Language\AST\Node\IntegerLiteral\IntegerLiteralNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\LexerException; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; use PackageFactory\ComponentEngine\Language\Parser\IntegerLiteral\IntegerLiteralCouldNotBeParsed; use PackageFactory\ComponentEngine\Language\Parser\IntegerLiteral\IntegerLiteralParser; -use PackageFactory\ComponentEngine\Parser\Source\Path; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenTypes; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; final class IntegerLiteralParserTest extends ParserTestCase @@ -40,7 +40,7 @@ final class IntegerLiteralParserTest extends ParserTestCase public function parsesBinaryInteger(): void { $integerLiteralParser = IntegerLiteralParser::singleton(); - $tokens = $this->createTokenIterator('0b1010110101'); + $lexer = new Lexer('0b1010110101'); $expectedIntegerLiteralNode = new IntegerLiteralNode( rangeInSource: $this->range([0, 0], [0, 11]), @@ -50,7 +50,7 @@ public function parsesBinaryInteger(): void $this->assertEquals( $expectedIntegerLiteralNode, - $integerLiteralParser->parse($tokens) + $integerLiteralParser->parse($lexer) ); } @@ -60,7 +60,7 @@ public function parsesBinaryInteger(): void public function parsesOctalInteger(): void { $integerLiteralParser = IntegerLiteralParser::singleton(); - $tokens = $this->createTokenIterator('0o755'); + $lexer = new Lexer('0o755'); $expectedIntegerLiteralNode = new IntegerLiteralNode( rangeInSource: $this->range([0, 0], [0, 4]), @@ -70,7 +70,7 @@ public function parsesOctalInteger(): void $this->assertEquals( $expectedIntegerLiteralNode, - $integerLiteralParser->parse($tokens) + $integerLiteralParser->parse($lexer) ); } @@ -80,7 +80,7 @@ public function parsesOctalInteger(): void public function parsesDecimalInteger(): void { $integerLiteralParser = IntegerLiteralParser::singleton(); - $tokens = $this->createTokenIterator('1234567890'); + $lexer = new Lexer('1234567890'); $expectedIntegerLiteralNode = new IntegerLiteralNode( rangeInSource: $this->range([0, 0], [0, 9]), @@ -90,7 +90,7 @@ public function parsesDecimalInteger(): void $this->assertEquals( $expectedIntegerLiteralNode, - $integerLiteralParser->parse($tokens) + $integerLiteralParser->parse($lexer) ); } @@ -100,7 +100,7 @@ public function parsesDecimalInteger(): void public function parsesHexadecimalInteger(): void { $integerLiteralParser = IntegerLiteralParser::singleton(); - $tokens = $this->createTokenIterator('0x123456789ABCDEF'); + $lexer = new Lexer('0x123456789ABCDEF'); $expectedIntegerLiteralNode = new IntegerLiteralNode( rangeInSource: $this->range([0, 0], [0, 16]), @@ -110,7 +110,7 @@ public function parsesHexadecimalInteger(): void $this->assertEquals( $expectedIntegerLiteralNode, - $integerLiteralParser->parse($tokens) + $integerLiteralParser->parse($lexer) ); } @@ -122,11 +122,21 @@ public function throwsIfTokenStreamEndsUnexpectedly(): void $this->assertThrowsParserException( function () { $integerLiteralParser = IntegerLiteralParser::singleton(); - $tokens = $this->createTokenIterator(''); + $lexer = new Lexer(''); - $integerLiteralParser->parse($tokens); + $integerLiteralParser->parse($lexer); }, - IntegerLiteralCouldNotBeParsed::becauseOfUnexpectedEndOfFile() + IntegerLiteralCouldNotBeParsed::becauseOfLexerException( + cause: LexerException::becauseOfUnexpectedEndOfSource( + expectedTokenTypes: TokenTypes::from( + TokenType::INTEGER_HEXADECIMAL, + TokenType::INTEGER_DECIMAL, + TokenType::INTEGER_OCTAL, + TokenType::INTEGER_BINARY + ), + affectedRangeInSource: $this->range([0, 0], [0, 0]) + ) + ) ); } @@ -138,22 +148,20 @@ public function throwsIfUnexpectedTokenIsEncountered(): void $this->assertThrowsParserException( function () { $integerLiteralParser = IntegerLiteralParser::singleton(); - $tokens = $this->createTokenIterator('foo1234'); + $lexer = new Lexer('foo1234'); - $integerLiteralParser->parse($tokens); + $integerLiteralParser->parse($lexer); }, - IntegerLiteralCouldNotBeParsed::becauseOfUnexpectedToken( - expectedTokenTypes: TokenTypes::from( - TokenType::NUMBER_BINARY, - TokenType::NUMBER_OCTAL, - TokenType::NUMBER_DECIMAL, - TokenType::NUMBER_HEXADECIMAL - ), - actualToken: new Token( - type: TokenType::STRING, - value: 'foo1234', - boundaries: $this->range([0, 0], [0, 6]), - sourcePath: Path::createMemory() + IntegerLiteralCouldNotBeParsed::becauseOfLexerException( + cause: LexerException::becauseOfUnexpectedCharacterSequence( + expectedTokenTypes: TokenTypes::from( + TokenType::INTEGER_HEXADECIMAL, + TokenType::INTEGER_DECIMAL, + TokenType::INTEGER_OCTAL, + TokenType::INTEGER_BINARY + ), + affectedRangeInSource: $this->range([0, 0], [0, 0]), + actualCharacterSequence: 'f' ) ) ); diff --git a/test/Unit/Language/Parser/Match/MatchParserTest.php b/test/Unit/Language/Parser/Match/MatchParserTest.php index a3c4393..9cdb949 100644 --- a/test/Unit/Language/Parser/Match/MatchParserTest.php +++ b/test/Unit/Language/Parser/Match/MatchParserTest.php @@ -30,6 +30,7 @@ use PackageFactory\ComponentEngine\Language\AST\Node\Match\MatchArmNodes; use PackageFactory\ComponentEngine\Language\AST\Node\Match\MatchNode; use PackageFactory\ComponentEngine\Language\AST\Node\ValueReference\ValueReferenceNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Parser\Match\MatchCouldNotBeParsed; use PackageFactory\ComponentEngine\Language\Parser\Match\MatchParser; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; @@ -42,7 +43,7 @@ final class MatchParserTest extends ParserTestCase public function parsesMatchWithOneArm(): void { $matchParser = MatchParser::singleton(); - $tokens = $this->createTokenIterator( + $lexer = new Lexer( 'match (a) { b -> c }' ); @@ -80,7 +81,7 @@ public function parsesMatchWithOneArm(): void $this->assertEquals( $expectedMatchNode, - $matchParser->parse($tokens) + $matchParser->parse($lexer) ); } @@ -90,7 +91,7 @@ public function parsesMatchWithOneArm(): void public function parsesMatchWithMultipleArms(): void { $matchParser = MatchParser::singleton(); - $tokens = $this->createTokenIterator( + $lexer = new Lexer( 'match (a) { b -> c d -> e f -> g }' ); @@ -166,7 +167,7 @@ public function parsesMatchWithMultipleArms(): void $this->assertEquals( $expectedMatchNode, - $matchParser->parse($tokens) + $matchParser->parse($lexer) ); } @@ -176,7 +177,7 @@ public function parsesMatchWithMultipleArms(): void public function parsesMatchWithOneSummarizedArm(): void { $matchParser = MatchParser::singleton(); - $tokens = $this->createTokenIterator( + $lexer = new Lexer( 'match (a) { b, c, d -> e }' ); @@ -228,7 +229,7 @@ public function parsesMatchWithOneSummarizedArm(): void $this->assertEquals( $expectedMatchNode, - $matchParser->parse($tokens) + $matchParser->parse($lexer) ); } @@ -238,7 +239,7 @@ public function parsesMatchWithOneSummarizedArm(): void public function parsesMatchWithMultipleSummarizedArms(): void { $matchParser = MatchParser::singleton(); - $tokens = $this->createTokenIterator( + $lexer = new Lexer( 'match (a) { b, c, d -> e f, g, h -> i j, k, l -> m }' ); @@ -356,7 +357,7 @@ public function parsesMatchWithMultipleSummarizedArms(): void $this->assertEquals( $expectedMatchNode, - $matchParser->parse($tokens) + $matchParser->parse($lexer) ); } @@ -366,7 +367,7 @@ public function parsesMatchWithMultipleSummarizedArms(): void public function parsesMatchWithOnlyDefaultArm(): void { $matchParser = MatchParser::singleton(); - $tokens = $this->createTokenIterator( + $lexer = new Lexer( 'match (a) { default -> b }' ); @@ -396,7 +397,7 @@ public function parsesMatchWithOnlyDefaultArm(): void $this->assertEquals( $expectedMatchNode, - $matchParser->parse($tokens) + $matchParser->parse($lexer) ); } @@ -406,7 +407,7 @@ public function parsesMatchWithOnlyDefaultArm(): void public function parsesMatchWithOneArmAndDefaultArm(): void { $matchParser = MatchParser::singleton(); - $tokens = $this->createTokenIterator( + $lexer = new Lexer( 'match (a) { b -> c default -> d }' ); @@ -455,7 +456,7 @@ public function parsesMatchWithOneArmAndDefaultArm(): void $this->assertEquals( $expectedMatchNode, - $matchParser->parse($tokens) + $matchParser->parse($lexer) ); } @@ -465,7 +466,7 @@ public function parsesMatchWithOneArmAndDefaultArm(): void public function parsesMatchWithOneSummarizedArmAndDefaultArm(): void { $matchParser = MatchParser::singleton(); - $tokens = $this->createTokenIterator( + $lexer = new Lexer( 'match (a) { b, c, d -> e default -> f }' ); @@ -528,7 +529,7 @@ public function parsesMatchWithOneSummarizedArmAndDefaultArm(): void $this->assertEquals( $expectedMatchNode, - $matchParser->parse($tokens) + $matchParser->parse($lexer) ); } @@ -547,7 +548,7 @@ public function parsesMatchWithMixedArms(): void l -> m } AFX; - $tokens = $this->createTokenIterator($matchAsString); + $lexer = new Lexer($matchAsString); $expectedMatchNode = new MatchNode( rangeInSource: $this->range([0, 0], [6, 0]), @@ -672,7 +673,7 @@ public function parsesMatchWithMixedArms(): void $this->assertEquals( $expectedMatchNode, - $matchParser->parse($tokens) + $matchParser->parse($lexer) ); } @@ -688,7 +689,7 @@ public function parsesNestedMatchAsSubject(): void default -> g } AFX; - $tokens = $this->createTokenIterator($matchAsString); + $lexer = new Lexer($matchAsString); $expectedMatchNode = new MatchNode( rangeInSource: $this->range([0, 0], [3, 0]), @@ -787,7 +788,7 @@ public function parsesNestedMatchAsSubject(): void $this->assertEquals( $expectedMatchNode, - $matchParser->parse($tokens) + $matchParser->parse($lexer) ); } @@ -805,7 +806,7 @@ public function parsesNestedMatchAsArmLeft(): void default -> q } AFX; - $tokens = $this->createTokenIterator($matchAsString); + $lexer = new Lexer($matchAsString); $expectedMatchNode = new MatchNode( rangeInSource: $this->range([0, 0], [5, 0]), @@ -995,7 +996,7 @@ public function parsesNestedMatchAsArmLeft(): void $this->assertEquals( $expectedMatchNode, - $matchParser->parse($tokens) + $matchParser->parse($lexer) ); } @@ -1011,7 +1012,7 @@ public function parsesNestedMatchAsArmRight(): void default -> h } AFX; - $tokens = $this->createTokenIterator($matchAsString); + $lexer = new Lexer($matchAsString); $expectedMatchNode = new MatchNode( rangeInSource: $this->range([0, 0], [3, 0]), @@ -1103,7 +1104,7 @@ public function parsesNestedMatchAsArmRight(): void $this->assertEquals( $expectedMatchNode, - $matchParser->parse($tokens) + $matchParser->parse($lexer) ); } @@ -1115,13 +1116,13 @@ public function throwsIfMatchArmsAreEmpty(): void $this->assertThrowsParserException( function () { $matchParser = MatchParser::singleton(); - $tokens = $this->createTokenIterator('match (a) {}'); + $lexer = new Lexer('match (a) {}'); - $matchParser->parse($tokens); + $matchParser->parse($lexer); }, MatchCouldNotBeParsed::becauseOfInvalidMatchArmNodes( cause: InvalidMatchArmNodes::becauseTheyWereEmpty(), - affectedRangeInSource: $this->range([0, 0], [0, 4]) + affectedRangeInSource: $this->range([0, 10], [0, 11]) ) ); } @@ -1143,9 +1144,9 @@ function () { j -> k } AFX; - $tokens = $this->createTokenIterator($matchAsString); + $lexer = new Lexer($matchAsString); - $matchParser->parse($tokens); + $matchParser->parse($lexer); }, MatchCouldNotBeParsed::becauseOfInvalidMatchArmNodes( cause: InvalidMatchArmNodes::becauseTheyContainMoreThanOneDefaultMatchArmNode( diff --git a/test/Unit/Language/Parser/Module/ModuleParserTest.php b/test/Unit/Language/Parser/Module/ModuleParserTest.php index a54ae77..0763d0c 100644 --- a/test/Unit/Language/Parser/Module/ModuleParserTest.php +++ b/test/Unit/Language/Parser/Module/ModuleParserTest.php @@ -34,11 +34,10 @@ use PackageFactory\ComponentEngine\Language\AST\Node\StringLiteral\StringLiteralNode; use PackageFactory\ComponentEngine\Language\AST\Node\StructDeclaration\StructDeclarationNode; use PackageFactory\ComponentEngine\Language\AST\Node\StructDeclaration\StructNameNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\LexerException; use PackageFactory\ComponentEngine\Language\Parser\Module\ModuleCouldNotBeParsed; use PackageFactory\ComponentEngine\Language\Parser\Module\ModuleParser; -use PackageFactory\ComponentEngine\Parser\Source\Path; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; final class ModuleParserTest extends ParserTestCase @@ -52,7 +51,7 @@ public function parsesModuleWithNoImports(): void $moduleAsString = <<createTokenIterator($moduleAsString); + $lexer = new Lexer($moduleAsString); $expectedModuleNode = new ModuleNode( rangeInSource: $this->range([0, 0], [0, 19]), @@ -72,7 +71,7 @@ public function parsesModuleWithNoImports(): void $this->assertEquals( $expectedModuleNode, - $moduleParser->parse($tokens) + $moduleParser->parse($lexer) ); } @@ -87,7 +86,7 @@ public function parsesModuleWithOneImport(): void export struct Baz {} AFX; - $tokens = $this->createTokenIterator($moduleAsString); + $lexer = new Lexer($moduleAsString); $expectedModuleNode = new ModuleNode( rangeInSource: $this->range([0, 0], [2, 19]), @@ -95,7 +94,7 @@ public function parsesModuleWithOneImport(): void new ImportNode( rangeInSource: $this->range([0, 0], [0, 37]), path: new StringLiteralNode( - rangeInSource: $this->range([0, 6], [0, 16]), + rangeInSource: $this->range([0, 5], [0, 17]), value: '/some/where' ), names: new ImportedNameNodes( @@ -125,7 +124,7 @@ public function parsesModuleWithOneImport(): void $this->assertEquals( $expectedModuleNode, - $moduleParser->parse($tokens) + $moduleParser->parse($lexer) ); } @@ -142,7 +141,7 @@ public function parsesModuleWithMultipleImports(): void export struct Corge {} AFX; - $tokens = $this->createTokenIterator($moduleAsString); + $lexer = new Lexer($moduleAsString); $expectedModuleNode = new ModuleNode( rangeInSource: $this->range([0, 0], [4, 21]), @@ -150,7 +149,7 @@ public function parsesModuleWithMultipleImports(): void new ImportNode( rangeInSource: $this->range([0, 0], [0, 37]), path: new StringLiteralNode( - rangeInSource: $this->range([0, 6], [0, 16]), + rangeInSource: $this->range([0, 5], [0, 17]), value: '/some/where' ), names: new ImportedNameNodes( @@ -167,7 +166,7 @@ public function parsesModuleWithMultipleImports(): void new ImportNode( rangeInSource: $this->range([1, 0], [1, 37]), path: new StringLiteralNode( - rangeInSource: $this->range([1, 6], [1, 21]), + rangeInSource: $this->range([1, 5], [1, 22]), value: '/some/where/else' ), names: new ImportedNameNodes( @@ -180,7 +179,7 @@ public function parsesModuleWithMultipleImports(): void new ImportNode( rangeInSource: $this->range([2, 0], [2, 33]), path: new StringLiteralNode( - rangeInSource: $this->range([2, 6], [2, 11]), + rangeInSource: $this->range([2, 5], [2, 12]), value: './here' ), names: new ImportedNameNodes( @@ -211,7 +210,7 @@ public function parsesModuleWithMultipleImports(): void $this->assertEquals( $expectedModuleNode, - $moduleParser->parse($tokens) + $moduleParser->parse($lexer) ); } @@ -240,7 +239,7 @@ public function toleratesCommentsAndSpacesInBetweenStatements(): void # AFX; - $tokens = $this->createTokenIterator($moduleAsString); + $lexer = new Lexer($moduleAsString); $expectedModuleNode = new ModuleNode( rangeInSource: $this->range([0, 0], [11, 19]), @@ -248,7 +247,7 @@ public function toleratesCommentsAndSpacesInBetweenStatements(): void new ImportNode( rangeInSource: $this->range([5, 0], [5, 37]), path: new StringLiteralNode( - rangeInSource: $this->range([5, 6], [5, 16]), + rangeInSource: $this->range([5, 5], [5, 17]), value: '/some/where' ), names: new ImportedNameNodes( @@ -278,14 +277,14 @@ public function toleratesCommentsAndSpacesInBetweenStatements(): void $this->assertEquals( $expectedModuleNode, - $moduleParser->parse($tokens) + $moduleParser->parse($lexer) ); } /** * @test */ - public function throwsIfExceedingTokensOccur(): void + public function throwsIfExceedingCharactersOccur(): void { $this->assertThrowsParserException( function () { @@ -297,16 +296,14 @@ function () { export struct Qux {} export struct Quux {} AFX; - $tokens = $this->createTokenIterator($moduleAsString); + $lexer = new Lexer($moduleAsString); - $moduleParser->parse($tokens); + $moduleParser->parse($lexer); }, - ModuleCouldNotBeParsed::becauseOfUnexpectedExceedingToken( - exceedingToken: new Token( - type: TokenType::KEYWORD_EXPORT, - value: 'export', - boundaries: $this->range([4, 0], [4, 5]), - sourcePath: Path::createMemory() + ModuleCouldNotBeParsed::becauseOfLexerException( + cause: LexerException::becauseOfUnexpectedExceedingSource( + affectedRangeInSource: $this->range([4, 0], [4, 0]), + exceedingCharacter: 'e' ) ) ); diff --git a/test/Unit/Language/Parser/NullLiteral/NullLiteralParserTest.php b/test/Unit/Language/Parser/NullLiteral/NullLiteralParserTest.php index 6cc820e..53ffd33 100644 --- a/test/Unit/Language/Parser/NullLiteral/NullLiteralParserTest.php +++ b/test/Unit/Language/Parser/NullLiteral/NullLiteralParserTest.php @@ -23,6 +23,7 @@ namespace PackageFactory\ComponentEngine\Test\Unit\Language\Parser\NullLiteral; use PackageFactory\ComponentEngine\Language\AST\Node\NullLiteral\NullLiteralNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Parser\NullLiteral\NullLiteralParser; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; @@ -34,7 +35,7 @@ final class NullLiteralParserTest extends ParserTestCase public function parsesNull(): void { $nullLiteralParser = NullLiteralParser::singleton(); - $tokens = $this->createTokenIterator('null'); + $lexer = new Lexer('null'); $expectedNullLiteralNode = new NullLiteralNode( rangeInSource: $this->range([0, 0], [0, 3]) @@ -42,7 +43,7 @@ public function parsesNull(): void $this->assertEquals( $expectedNullLiteralNode, - $nullLiteralParser->parse($tokens) + $nullLiteralParser->parse($lexer) ); } } diff --git a/test/Unit/Language/Parser/PropertyDeclaration/PropertyDeclarationParserTest.php b/test/Unit/Language/Parser/PropertyDeclaration/PropertyDeclarationParserTest.php index 720d636..e84b84a 100644 --- a/test/Unit/Language/Parser/PropertyDeclaration/PropertyDeclarationParserTest.php +++ b/test/Unit/Language/Parser/PropertyDeclaration/PropertyDeclarationParserTest.php @@ -29,6 +29,7 @@ use PackageFactory\ComponentEngine\Language\AST\Node\TypeReference\TypeNameNode; use PackageFactory\ComponentEngine\Language\AST\Node\TypeReference\TypeNameNodes; use PackageFactory\ComponentEngine\Language\AST\Node\TypeReference\TypeReferenceNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Parser\PropertyDeclaration\PropertyDeclarationParser; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; @@ -40,7 +41,7 @@ final class PropertyDeclarationParserTest extends ParserTestCase public function parsesPropertyDeclarationWithSimpleType(): void { $propertyDeclarationParser = PropertyDeclarationParser::singleton(); - $tokens = $this->createTokenIterator('foo: Bar'); + $lexer = new Lexer('foo: Bar'); $expectedPropertyDeclarationNode = new PropertyDeclarationNode( rangeInSource: $this->range([0, 0], [0, 7]), @@ -63,7 +64,7 @@ public function parsesPropertyDeclarationWithSimpleType(): void $this->assertEquals( $expectedPropertyDeclarationNode, - $propertyDeclarationParser->parse($tokens) + $propertyDeclarationParser->parse($lexer) ); } @@ -73,7 +74,7 @@ public function parsesPropertyDeclarationWithSimpleType(): void public function parsesPropertyDeclarationWithOptionalType(): void { $propertyDeclarationParser = PropertyDeclarationParser::singleton(); - $tokens = $this->createTokenIterator('foo: ?Bar'); + $lexer = new Lexer('foo: ?Bar'); $expectedPropertyDeclarationNode = new PropertyDeclarationNode( rangeInSource: $this->range([0, 0], [0, 8]), @@ -96,7 +97,7 @@ public function parsesPropertyDeclarationWithOptionalType(): void $this->assertEquals( $expectedPropertyDeclarationNode, - $propertyDeclarationParser->parse($tokens) + $propertyDeclarationParser->parse($lexer) ); } @@ -106,7 +107,7 @@ public function parsesPropertyDeclarationWithOptionalType(): void public function parsesPropertyDeclarationWithArrayType(): void { $propertyDeclarationParser = PropertyDeclarationParser::singleton(); - $tokens = $this->createTokenIterator('foo: Bar[]'); + $lexer = new Lexer('foo: Bar[]'); $expectedPropertyDeclarationNode = new PropertyDeclarationNode( rangeInSource: $this->range([0, 0], [0, 9]), @@ -129,7 +130,7 @@ public function parsesPropertyDeclarationWithArrayType(): void $this->assertEquals( $expectedPropertyDeclarationNode, - $propertyDeclarationParser->parse($tokens) + $propertyDeclarationParser->parse($lexer) ); } @@ -139,7 +140,7 @@ public function parsesPropertyDeclarationWithArrayType(): void public function parsesPropertyDeclarationWithUnionType(): void { $propertyDeclarationParser = PropertyDeclarationParser::singleton(); - $tokens = $this->createTokenIterator('foo: Bar|Baz|Qux'); + $lexer = new Lexer('foo: Bar|Baz|Qux'); $expectedPropertyDeclarationNode = new PropertyDeclarationNode( rangeInSource: $this->range([0, 0], [0, 15]), @@ -170,7 +171,7 @@ public function parsesPropertyDeclarationWithUnionType(): void $this->assertEquals( $expectedPropertyDeclarationNode, - $propertyDeclarationParser->parse($tokens) + $propertyDeclarationParser->parse($lexer) ); } } diff --git a/test/Unit/Language/Parser/StringLiteral/StringLiteralParserTest.php b/test/Unit/Language/Parser/StringLiteral/StringLiteralParserTest.php index bfee07e..359aad7 100644 --- a/test/Unit/Language/Parser/StringLiteral/StringLiteralParserTest.php +++ b/test/Unit/Language/Parser/StringLiteral/StringLiteralParserTest.php @@ -23,27 +23,47 @@ namespace PackageFactory\ComponentEngine\Test\Unit\Language\Parser\StringLiteral; use PackageFactory\ComponentEngine\Language\AST\Node\StringLiteral\StringLiteralNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Parser\StringLiteral\StringLiteralParser; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; final class StringLiteralParserTest extends ParserTestCase { + /** + * @test + */ + public function parsesEmptyString(): void + { + $stringLiteralParser = StringLiteralParser::singleton(); + $lexer = new Lexer('""'); + + $expectedStringLiteralNode = new StringLiteralNode( + rangeInSource: $this->range([0, 0], [0, 1]), + value: '' + ); + + $this->assertEquals( + $expectedStringLiteralNode, + $stringLiteralParser->parse($lexer) + ); + } + /** * @test */ public function parsesString(): void { $stringLiteralParser = StringLiteralParser::singleton(); - $tokens = $this->createTokenIterator('"Hello World"'); + $lexer = new Lexer('"Hello World"'); $expectedStringLiteralNode = new StringLiteralNode( - rangeInSource: $this->range([0, 1], [0, 11]), + rangeInSource: $this->range([0, 0], [0, 12]), value: 'Hello World' ); $this->assertEquals( $expectedStringLiteralNode, - $stringLiteralParser->parse($tokens) + $stringLiteralParser->parse($lexer) ); } } diff --git a/test/Unit/Language/Parser/StructDeclaration/StructDeclarationParserTest.php b/test/Unit/Language/Parser/StructDeclaration/StructDeclarationParserTest.php index 87fff6e..79b3b16 100644 --- a/test/Unit/Language/Parser/StructDeclaration/StructDeclarationParserTest.php +++ b/test/Unit/Language/Parser/StructDeclaration/StructDeclarationParserTest.php @@ -33,6 +33,7 @@ use PackageFactory\ComponentEngine\Language\AST\Node\TypeReference\TypeNameNode; use PackageFactory\ComponentEngine\Language\AST\Node\TypeReference\TypeNameNodes; use PackageFactory\ComponentEngine\Language\AST\Node\TypeReference\TypeReferenceNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Parser\StructDeclaration\StructDeclarationParser; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; @@ -44,7 +45,7 @@ final class StructDeclarationParserTest extends ParserTestCase public function parsesStructDeclarationWithOneProperty(): void { $structDeclarationParser = StructDeclarationParser::singleton(); - $tokens = $this->createTokenIterator('struct Foo { bar: Baz }'); + $lexer = new Lexer('struct Foo { bar: Baz }'); $expectedStructDeclarationNode = new StructDeclarationNode( rangeInSource: $this->range([0, 0], [0, 22]), @@ -76,7 +77,7 @@ public function parsesStructDeclarationWithOneProperty(): void $this->assertEquals( $expectedStructDeclarationNode, - $structDeclarationParser->parse($tokens) + $structDeclarationParser->parse($lexer) ); } @@ -86,7 +87,7 @@ public function parsesStructDeclarationWithOneProperty(): void public function parsesStructDeclarationWithMultipleProperties(): void { $structDeclarationParser = StructDeclarationParser::singleton(); - $tokens = $this->createTokenIterator('struct Foo { bar: Baz qux: Quux corge: Grault }'); + $lexer = new Lexer('struct Foo { bar: Baz qux: Quux corge: Grault }'); $expectedStructDeclarationNode = new StructDeclarationNode( rangeInSource: $this->range([0, 0], [0, 46]), @@ -154,7 +155,7 @@ public function parsesStructDeclarationWithMultipleProperties(): void $this->assertEquals( $expectedStructDeclarationNode, - $structDeclarationParser->parse($tokens) + $structDeclarationParser->parse($lexer) ); } @@ -175,7 +176,7 @@ public function parsesStructDeclarationWithMultiplePropertiesAndSpaceAndComments } AFX; - $tokens = $this->createTokenIterator($structAsString); + $lexer = new Lexer($structAsString); $expectedStructDeclarationNode = new StructDeclarationNode( rangeInSource: $this->range([0, 0], [8, 0]), @@ -225,7 +226,7 @@ public function parsesStructDeclarationWithMultiplePropertiesAndSpaceAndComments $this->assertEquals( $expectedStructDeclarationNode, - $structDeclarationParser->parse($tokens) + $structDeclarationParser->parse($lexer) ); } @@ -242,7 +243,7 @@ public function parsesStructDeclarationWitOptionalArrayAndUnionProperties(): voi title: ?string } AFX; - $tokens = $this->createTokenIterator($structAsString); + $lexer = new Lexer($structAsString); $expectedStructDeclarationNode = new StructDeclarationNode( rangeInSource: $this->range([0, 0], [4, 0]), @@ -318,7 +319,7 @@ public function parsesStructDeclarationWitOptionalArrayAndUnionProperties(): voi $this->assertEquals( $expectedStructDeclarationNode, - $structDeclarationParser->parse($tokens) + $structDeclarationParser->parse($lexer) ); } } diff --git a/test/Unit/Language/Parser/Tag/TagParserTest.php b/test/Unit/Language/Parser/Tag/TagParserTest.php index f43eeff..464dc35 100644 --- a/test/Unit/Language/Parser/Tag/TagParserTest.php +++ b/test/Unit/Language/Parser/Tag/TagParserTest.php @@ -35,6 +35,7 @@ use PackageFactory\ComponentEngine\Language\AST\Node\Tag\TagNode; use PackageFactory\ComponentEngine\Language\AST\Node\Text\TextNode; use PackageFactory\ComponentEngine\Language\AST\Node\ValueReference\ValueReferenceNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Parser\Tag\TagParser; use PackageFactory\ComponentEngine\Language\Parser\Tag\TagCouldNotBeParsed; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; @@ -47,7 +48,7 @@ final class TagParserTest extends ParserTestCase public function parsesSelfClosingTagWithoutAttributes(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator(''); + $lexer = new Lexer(''); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 3]), @@ -62,7 +63,7 @@ public function parsesSelfClosingTagWithoutAttributes(): void $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -72,7 +73,7 @@ public function parsesSelfClosingTagWithoutAttributes(): void public function parsesSelfClosingTagWithValuelessAttribute(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator(''); + $lexer = new Lexer('
'); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 11]), @@ -96,7 +97,7 @@ public function parsesSelfClosingTagWithValuelessAttribute(): void $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -106,7 +107,7 @@ public function parsesSelfClosingTagWithValuelessAttribute(): void public function parsesSelfClosingTagWithMultipleValuelessAttributes(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator('
'); + $lexer = new Lexer('
'); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 19]), @@ -146,7 +147,7 @@ public function parsesSelfClosingTagWithMultipleValuelessAttributes(): void $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -156,7 +157,7 @@ public function parsesSelfClosingTagWithMultipleValuelessAttributes(): void public function parsesSelfClosingTagWithStringAttribute(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator(''); + $lexer = new Lexer(''); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 13]), @@ -166,13 +167,13 @@ public function parsesSelfClosingTagWithStringAttribute(): void ), attributes: new AttributeNodes( new AttributeNode( - rangeInSource: $this->range([0, 3], [0, 10]), + rangeInSource: $this->range([0, 3], [0, 11]), name: new AttributeNameNode( rangeInSource: $this->range([0, 3], [0, 5]), value: AttributeName::from('foo') ), value: new StringLiteralNode( - rangeInSource: $this->range([0, 8], [0, 10]), + rangeInSource: $this->range([0, 7], [0, 11]), value: 'bar' ) ) @@ -183,7 +184,7 @@ public function parsesSelfClosingTagWithStringAttribute(): void $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -193,7 +194,7 @@ public function parsesSelfClosingTagWithStringAttribute(): void public function parsesSelfClosingTagWithMultipleStringAttributes(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator('
'); + $lexer = new Lexer('
'); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 38]), @@ -203,35 +204,35 @@ public function parsesSelfClosingTagWithMultipleStringAttributes(): void ), attributes: new AttributeNodes( new AttributeNode( - rangeInSource: $this->range([0, 5], [0, 12]), + rangeInSource: $this->range([0, 5], [0, 13]), name: new AttributeNameNode( rangeInSource: $this->range([0, 5], [0, 7]), value: AttributeName::from('foo') ), value: new StringLiteralNode( - rangeInSource: $this->range([0, 10], [0, 12]), + rangeInSource: $this->range([0, 9], [0, 13]), value: 'bar' ) ), new AttributeNode( - rangeInSource: $this->range([0, 15], [0, 22]), + rangeInSource: $this->range([0, 15], [0, 23]), name: new AttributeNameNode( rangeInSource: $this->range([0, 15], [0, 17]), value: AttributeName::from('baz') ), value: new StringLiteralNode( - rangeInSource: $this->range([0, 20], [0, 22]), + rangeInSource: $this->range([0, 19], [0, 23]), value: 'qux' ) ), new AttributeNode( - rangeInSource: $this->range([0, 25], [0, 35]), + rangeInSource: $this->range([0, 25], [0, 36]), name: new AttributeNameNode( rangeInSource: $this->range([0, 25], [0, 28]), value: AttributeName::from('quux') ), value: new StringLiteralNode( - rangeInSource: $this->range([0, 31], [0, 35]), + rangeInSource: $this->range([0, 30], [0, 36]), value: 'corge' ) ) @@ -242,7 +243,7 @@ public function parsesSelfClosingTagWithMultipleStringAttributes(): void $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -252,7 +253,7 @@ public function parsesSelfClosingTagWithMultipleStringAttributes(): void public function parsesSelfClosingTagWithExpressionAttribute(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator(''); + $lexer = new Lexer(''); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 13]), @@ -282,7 +283,7 @@ public function parsesSelfClosingTagWithExpressionAttribute(): void $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -292,7 +293,7 @@ public function parsesSelfClosingTagWithExpressionAttribute(): void public function parsesSelfClosingTagWithMultipleExpressionAttributes(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator('
'); + $lexer = new Lexer('
'); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 38]), @@ -350,7 +351,7 @@ public function parsesSelfClosingTagWithMultipleExpressionAttributes(): void $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -360,7 +361,7 @@ public function parsesSelfClosingTagWithMultipleExpressionAttributes(): void public function parsesTagWithEmptyContentAndWithoutAttributes(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator(''); + $lexer = new Lexer(''); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 6]), @@ -375,7 +376,7 @@ public function parsesTagWithEmptyContentAndWithoutAttributes(): void $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -387,14 +388,14 @@ public function throwsIfClosingTagNameDoesNotMatchOpeningTagName(): void $this->assertThrowsParserException( function () { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator(''); + $lexer = new Lexer(''); - $tagParser->parse($tokens); + $tagParser->parse($lexer); }, TagCouldNotBeParsed::becauseOfClosingTagNameMismatch( expectedTagName: TagName::from('a'), actualTagName: 'b', - affectedRangeInSource: $this->range([0, 5], [0, 5]) + affectedRangeInSource: $this->range([0, 3], [0, 6]) ) ); } @@ -405,7 +406,7 @@ function () { public function parsesTagWithEmptyContentAndValuelessAttribute(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator(''); + $lexer = new Lexer(''); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 10]), @@ -429,7 +430,7 @@ public function parsesTagWithEmptyContentAndValuelessAttribute(): void $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -439,7 +440,7 @@ public function parsesTagWithEmptyContentAndValuelessAttribute(): void public function parsesTagWithEmptyContentAndMultipleValuelessAttributes(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator(''); + $lexer = new Lexer(''); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 18]), @@ -479,7 +480,7 @@ public function parsesTagWithEmptyContentAndMultipleValuelessAttributes(): void $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -489,7 +490,7 @@ public function parsesTagWithEmptyContentAndMultipleValuelessAttributes(): void public function parsesTagWithEmptyContentAndStringAttribute(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator(''); + $lexer = new Lexer(''); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 24]), @@ -499,13 +500,13 @@ public function parsesTagWithEmptyContentAndStringAttribute(): void ), attributes: new AttributeNodes( new AttributeNode( - rangeInSource: $this->range([0, 7], [0, 14]), + rangeInSource: $this->range([0, 7], [0, 15]), name: new AttributeNameNode( rangeInSource: $this->range([0, 7], [0, 9]), value: AttributeName::from('foo') ), value: new StringLiteralNode( - rangeInSource: $this->range([0, 12], [0, 14]), + rangeInSource: $this->range([0, 11], [0, 15]), value: 'bar' ) ), @@ -516,7 +517,7 @@ public function parsesTagWithEmptyContentAndStringAttribute(): void $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -526,7 +527,7 @@ public function parsesTagWithEmptyContentAndStringAttribute(): void public function parsesTagWithEmptyContentAndMultipleStringAttributes(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator(''); + $lexer = new Lexer(''); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 47]), @@ -536,35 +537,35 @@ public function parsesTagWithEmptyContentAndMultipleStringAttributes(): void ), attributes: new AttributeNodes( new AttributeNode( - rangeInSource: $this->range([0, 7], [0, 14]), + rangeInSource: $this->range([0, 7], [0, 15]), name: new AttributeNameNode( rangeInSource: $this->range([0, 7], [0, 9]), value: AttributeName::from('foo') ), value: new StringLiteralNode( - rangeInSource: $this->range([0, 12], [0, 14]), + rangeInSource: $this->range([0, 11], [0, 15]), value: 'bar' ) ), new AttributeNode( - rangeInSource: $this->range([0, 17], [0, 24]), + rangeInSource: $this->range([0, 17], [0, 25]), name: new AttributeNameNode( rangeInSource: $this->range([0, 17], [0, 19]), value: AttributeName::from('baz') ), value: new StringLiteralNode( - rangeInSource: $this->range([0, 22], [0, 24]), + rangeInSource: $this->range([0, 21], [0, 25]), value: 'qux' ) ), new AttributeNode( - rangeInSource: $this->range([0, 27], [0, 37]), + rangeInSource: $this->range([0, 27], [0, 38]), name: new AttributeNameNode( rangeInSource: $this->range([0, 27], [0, 30]), value: AttributeName::from('quux') ), value: new StringLiteralNode( - rangeInSource: $this->range([0, 33], [0, 37]), + rangeInSource: $this->range([0, 32], [0, 38]), value: 'corge' ) ), @@ -575,7 +576,7 @@ public function parsesTagWithEmptyContentAndMultipleStringAttributes(): void $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -585,7 +586,7 @@ public function parsesTagWithEmptyContentAndMultipleStringAttributes(): void public function parsesTagWithEmptyContentAndExpressionAttribute(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator(''); + $lexer = new Lexer(''); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 24]), @@ -615,7 +616,7 @@ public function parsesTagWithEmptyContentAndExpressionAttribute(): void $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -625,7 +626,7 @@ public function parsesTagWithEmptyContentAndExpressionAttribute(): void public function parsesTagWithEmptyContentAndMultipleExpressionAttributes(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator(''); + $lexer = new Lexer(''); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 47]), @@ -683,7 +684,7 @@ public function parsesTagWithEmptyContentAndMultipleExpressionAttributes(): void $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -693,7 +694,7 @@ public function parsesTagWithEmptyContentAndMultipleExpressionAttributes(): void public function parsesTagWithTextContentAndWithoutAttributes(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator('Lorem ipsum...'); + $lexer = new Lexer('Lorem ipsum...'); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 20]), @@ -713,7 +714,7 @@ public function parsesTagWithTextContentAndWithoutAttributes(): void $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -723,7 +724,7 @@ public function parsesTagWithTextContentAndWithoutAttributes(): void public function parsesTagWithExpressionContentAndWithoutAttributes(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator('{someExpression}'); + $lexer = new Lexer('{someExpression}'); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 22]), @@ -746,7 +747,7 @@ public function parsesTagWithExpressionContentAndWithoutAttributes(): void $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -756,7 +757,7 @@ public function parsesTagWithExpressionContentAndWithoutAttributes(): void public function parsesTagWithNestedSelfClosingTagContentAndWithoutAttributes(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator(''); + $lexer = new Lexer(''); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 10]), @@ -782,7 +783,7 @@ public function parsesTagWithNestedSelfClosingTagContentAndWithoutAttributes(): $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -792,7 +793,7 @@ public function parsesTagWithNestedSelfClosingTagContentAndWithoutAttributes(): public function parsesTagWithNestedTagAndWithoutAttributes(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator(''); + $lexer = new Lexer(''); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 13]), @@ -818,7 +819,7 @@ public function parsesTagWithNestedTagAndWithoutAttributes(): void $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -828,7 +829,7 @@ public function parsesTagWithNestedTagAndWithoutAttributes(): void public function parsesTagWithNestedTagsOnMultipleLevelsAndWithoutAttributes(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator(''); + $lexer = new Lexer(''); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 24]), @@ -876,7 +877,7 @@ public function parsesTagWithNestedTagsOnMultipleLevelsAndWithoutAttributes(): v $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -886,7 +887,7 @@ public function parsesTagWithNestedTagsOnMultipleLevelsAndWithoutAttributes(): v public function parsesTagWithNestedTagInBetweenSpacesAndWithoutAttributes(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator(' '); + $lexer = new Lexer(' '); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 19]), @@ -912,7 +913,7 @@ public function parsesTagWithNestedTagInBetweenSpacesAndWithoutAttributes(): voi $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -922,7 +923,7 @@ public function parsesTagWithNestedTagInBetweenSpacesAndWithoutAttributes(): voi public function parsesTagWithNestedTagInBetweenTextContentPreservingSpaceAroundTheNestedTag(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator('Something important happened.'); + $lexer = new Lexer('Something important happened.'); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 42]), @@ -961,7 +962,7 @@ public function parsesTagWithNestedTagInBetweenTextContentPreservingSpaceAroundT $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -971,7 +972,7 @@ public function parsesTagWithNestedTagInBetweenTextContentPreservingSpaceAroundT public function parsesTagWithExpressionInBetweenTextContentPreservingSpaceAroundTheExpression(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator('Something {variable} happened.'); + $lexer = new Lexer('Something {variable} happened.'); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 36]), @@ -1002,7 +1003,7 @@ public function parsesTagWithExpressionInBetweenTextContentPreservingSpaceAround $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -1012,7 +1013,7 @@ public function parsesTagWithExpressionInBetweenTextContentPreservingSpaceAround public function parsesTagWithMultipleNestedTagsAsImmediateChildren(): void { $tagParser = TagParser::singleton(); - $tokens = $this->createTokenIterator(''); + $lexer = new Lexer(''); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [0, 24]), @@ -1058,7 +1059,7 @@ public function parsesTagWithMultipleNestedTagsAsImmediateChildren(): void $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } @@ -1079,7 +1080,7 @@ public function parsesTagWithMultipleNestedTagsOnMultipleLevelsAllHavingAttribut Some closing text
AFX; - $tokens = $this->createTokenIterator($tagAsString); + $lexer = new Lexer($tagAsString); $expectedTagNode = new TagNode( rangeInSource: $this->range([0, 0], [8, 5]), @@ -1089,13 +1090,13 @@ public function parsesTagWithMultipleNestedTagsOnMultipleLevelsAllHavingAttribut ), attributes: new AttributeNodes( new AttributeNode( - rangeInSource: $this->range([0, 5], [0, 15]), + rangeInSource: $this->range([0, 5], [0, 16]), name: new AttributeNameNode( rangeInSource: $this->range([0, 5], [0, 9]), value: AttributeName::from('class') ), value: new StringLiteralNode( - rangeInSource: $this->range([0, 12], [0, 15]), + rangeInSource: $this->range([0, 11], [0, 16]), value: 'test' ) ), @@ -1136,24 +1137,24 @@ public function parsesTagWithMultipleNestedTagsOnMultipleLevelsAllHavingAttribut ), attributes: new AttributeNodes( new AttributeNode( - rangeInSource: $this->range([3, 7], [3, 23]), + rangeInSource: $this->range([3, 7], [3, 24]), name: new AttributeNameNode( rangeInSource: $this->range([3, 7], [3, 10]), value: AttributeName::from('href') ), value: new StringLiteralNode( - rangeInSource: $this->range([3, 13], [3, 23]), + rangeInSource: $this->range([3, 12], [3, 24]), value: 'about:blank' ) ), new AttributeNode( - rangeInSource: $this->range([3, 26], [3, 39]), + rangeInSource: $this->range([3, 26], [3, 40]), name: new AttributeNameNode( rangeInSource: $this->range([3, 26], [3, 31]), value: AttributeName::from('target') ), value: new StringLiteralNode( - rangeInSource: $this->range([3, 34], [3, 39]), + rangeInSource: $this->range([3, 33], [3, 40]), value: '_blank' ) ), @@ -1255,7 +1256,7 @@ public function parsesTagWithMultipleNestedTagsOnMultipleLevelsAllHavingAttribut $this->assertEquals( $expectedTagNode, - $tagParser->parse($tokens) + $tagParser->parse($lexer) ); } } diff --git a/test/Unit/Language/Parser/TemplateLiteral/TemplateLiteralParserTest.php b/test/Unit/Language/Parser/TemplateLiteral/TemplateLiteralParserTest.php index d5b2c65..af14f6a 100644 --- a/test/Unit/Language/Parser/TemplateLiteral/TemplateLiteralParserTest.php +++ b/test/Unit/Language/Parser/TemplateLiteral/TemplateLiteralParserTest.php @@ -28,11 +28,14 @@ use PackageFactory\ComponentEngine\Language\AST\Node\Expression\ExpressionNode; use PackageFactory\ComponentEngine\Language\AST\Node\StringLiteral\StringLiteralNode; use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralExpressionSegmentNode; +use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralLine; +use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralLines; use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralNode; use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralSegments; use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralStringSegmentNode; use PackageFactory\ComponentEngine\Language\AST\Node\TernaryOperation\TernaryOperationNode; use PackageFactory\ComponentEngine\Language\AST\Node\ValueReference\ValueReferenceNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Parser\TemplateLiteral\TemplateLiteralParser; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; @@ -41,45 +44,48 @@ final class TemplateLiteralParserTest extends ParserTestCase /** * @test */ - public function parsesTemplateLiteralWithoutEmbeddedExpressions(): void + public function parsesEmptyTemplateLiteral(): void { $templateLiteralParser = TemplateLiteralParser::singleton(); - $tokens = $this->createTokenIterator('`Hello World`'); + $lexer = new Lexer(<<range([0, 0], [0, 12]), - segments: new TemplateLiteralSegments( - new TemplateLiteralStringSegmentNode( - rangeInSource: $this->range([0, 1], [0, 11]), - value: 'Hello World' - ) - ) + rangeInSource: $this->range([0, 0], [1, 2]), + indentation: 0, + lines: new TemplateLiteralLines() ); $this->assertEquals( $expectedTemplateLiteralNode, - $templateLiteralParser->parse($tokens) + $templateLiteralParser->parse($lexer) ); } /** * @test */ - public function parsesTemplateLiteralWithOnlyEmbeddedExpression(): void + public function parsesTemplateLiteralWithoutEmbeddedExpressions(): void { $templateLiteralParser = TemplateLiteralParser::singleton(); - $tokens = $this->createTokenIterator('`${foo}`'); + $lexer = new Lexer(<<range([0, 0], [0, 7]), - segments: new TemplateLiteralSegments( - new TemplateLiteralExpressionSegmentNode( - rangeInSource: $this->range([0, 1], [0, 6]), - expression: new ExpressionNode( - rangeInSource: $this->range([0, 3], [0, 5]), - root: new ValueReferenceNode( - rangeInSource: $this->range([0, 3], [0, 5]), - name: VariableName::from('foo') + rangeInSource: $this->range([0, 0], [2, 2]), + indentation: 0, + lines: new TemplateLiteralLines( + new TemplateLiteralLine( + indentation: 0, + segments: new TemplateLiteralSegments( + new TemplateLiteralStringSegmentNode( + rangeInSource: $this->range([1, 0], [1, 10]), + value: 'Hello World' ) ) ) @@ -88,177 +94,343 @@ public function parsesTemplateLiteralWithOnlyEmbeddedExpression(): void $this->assertEquals( $expectedTemplateLiteralNode, - $templateLiteralParser->parse($tokens) + $templateLiteralParser->parse($lexer) ); } /** * @test */ - public function parsesTemplateLiteralWithLeadingAndTrailingStringSegments(): void + public function parsesTemplateLiteralWhileCapturingFinalAndLineIndentation(): void { $templateLiteralParser = TemplateLiteralParser::singleton(); - $tokens = $this->createTokenIterator('`Hello ${friend}!`'); + $lexer = new Lexer(<<range([0, 0], [0, 17]), - segments: new TemplateLiteralSegments( - new TemplateLiteralStringSegmentNode( - rangeInSource: $this->range([0, 1], [0, 6]), - value: 'Hello ' - ), - new TemplateLiteralExpressionSegmentNode( - rangeInSource: $this->range([0, 7], [0, 15]), - expression: new ExpressionNode( - rangeInSource: $this->range([0, 9], [0, 14]), - root: new ValueReferenceNode( - rangeInSource: $this->range([0, 9], [0, 14]), - name: VariableName::from('friend') + rangeInSource: $this->range([0, 0], [4, 6]), + indentation: 4, + lines: new TemplateLiteralLines( + new TemplateLiteralLine( + indentation: 4, + segments: new TemplateLiteralSegments( + new TemplateLiteralStringSegmentNode( + rangeInSource: $this->range([1, 4], [1, 14]), + value: 'Hello World' ) ) ), - new TemplateLiteralStringSegmentNode( - rangeInSource: $this->range([0, 16], [0, 16]), - value: '!' + new TemplateLiteralLine( + indentation: 8, + segments: new TemplateLiteralSegments( + new TemplateLiteralStringSegmentNode( + rangeInSource: $this->range([2, 8], [2, 18]), + value: 'Hello World' + ) + ) ), + new TemplateLiteralLine( + indentation: 10, + segments: new TemplateLiteralSegments( + new TemplateLiteralStringSegmentNode( + rangeInSource: $this->range([3, 10], [3, 20]), + value: 'Hello World' + ) + ) + ) ) ); $this->assertEquals( $expectedTemplateLiteralNode, - $templateLiteralParser->parse($tokens) + $templateLiteralParser->parse($lexer) ); } /** * @test */ - public function parsesTemplateLiteralWithLeadingAndTrailingExpressionSegments(): void + public function parsesTemplateLiteralWithEmptyLines(): void { $templateLiteralParser = TemplateLiteralParser::singleton(); - $tokens = $this->createTokenIterator('`${greeting} to you, ${friend}`'); + $lexer = new Lexer(<<range([0, 0], [0, 30]), - segments: new TemplateLiteralSegments( - new TemplateLiteralExpressionSegmentNode( - rangeInSource: $this->range([0, 1], [0, 11]), - expression: new ExpressionNode( - rangeInSource: $this->range([0, 3], [0, 10]), - root: new ValueReferenceNode( - rangeInSource: $this->range([0, 3], [0, 10]), - name: VariableName::from('greeting') + rangeInSource: $this->range([0, 0], [4, 2]), + indentation: 0, + lines: new TemplateLiteralLines( + new TemplateLiteralLine( + indentation: 0, + segments: new TemplateLiteralSegments() + ), + new TemplateLiteralLine( + indentation: 0, + segments: new TemplateLiteralSegments( + new TemplateLiteralStringSegmentNode( + rangeInSource: $this->range([2, 0], [2, 10]), + value: 'Hello World' ) ) ), - new TemplateLiteralStringSegmentNode( - rangeInSource: $this->range([0, 12], [0, 20]), - value: ' to you, ' - ), - new TemplateLiteralExpressionSegmentNode( - rangeInSource: $this->range([0, 21], [0, 29]), - expression: new ExpressionNode( - rangeInSource: $this->range([0, 23], [0, 28]), - root: new ValueReferenceNode( - rangeInSource: $this->range([0, 23], [0, 28]), - name: VariableName::from('friend') + new TemplateLiteralLine( + indentation: 0, + segments: new TemplateLiteralSegments() + ) + ) + ); + + $this->assertEquals( + $expectedTemplateLiteralNode, + $templateLiteralParser->parse($lexer) + ); + } + + /** + * @test + */ + public function parsesTemplateLiteralWithOnlyEmbeddedExpression(): void + { + $templateLiteralParser = TemplateLiteralParser::singleton(); + $lexer = new Lexer(<<range([0, 0], [2, 2]), + indentation: 0, + lines: new TemplateLiteralLines( + new TemplateLiteralLine( + indentation: 0, + segments: new TemplateLiteralSegments( + new TemplateLiteralExpressionSegmentNode( + rangeInSource: $this->range([1, 0], [1, 4]), + expression: new ExpressionNode( + rangeInSource: $this->range([1, 1], [1, 3]), + root: new ValueReferenceNode( + rangeInSource: $this->range([1, 1], [1, 3]), + name: VariableName::from('foo') + ) + ) ) ) - ), + ) ) ); $this->assertEquals( $expectedTemplateLiteralNode, - $templateLiteralParser->parse($tokens) + $templateLiteralParser->parse($lexer) ); } /** * @test */ - public function parsesTemplateLiteralWithComplexExpression(): void + public function parsesTemplateLiteralWithLeadingAndTrailingStringSegments(): void { $templateLiteralParser = TemplateLiteralParser::singleton(); - $tokens = $this->createTokenIterator( - '`The result is: ${a < b ? "yes" : (foo ? "maybe" : "no")}`' + $lexer = new Lexer(<<range([0, 0], [2, 2]), + indentation: 0, + lines: new TemplateLiteralLines( + new TemplateLiteralLine( + indentation: 0, + segments: new TemplateLiteralSegments( + new TemplateLiteralStringSegmentNode( + rangeInSource: $this->range([1, 0], [1, 5]), + value: 'Hello ' + ), + new TemplateLiteralExpressionSegmentNode( + rangeInSource: $this->range([1, 6], [1, 13]), + expression: new ExpressionNode( + rangeInSource: $this->range([1, 7], [1, 12]), + root: new ValueReferenceNode( + rangeInSource: $this->range([1, 7], [1, 12]), + name: VariableName::from('friend') + ) + ) + ), + new TemplateLiteralStringSegmentNode( + rangeInSource: $this->range([1, 14], [1, 14]), + value: '!' + ), + ) + ) + ) ); + $this->assertEquals( + $expectedTemplateLiteralNode, + $templateLiteralParser->parse($lexer) + ); + } + + /** + * @test + */ + public function parsesTemplateLiteralWithLeadingAndTrailingExpressionSegments(): void + { + $templateLiteralParser = TemplateLiteralParser::singleton(); + $lexer = new Lexer(<<range([0, 0], [0, 57]), - segments: new TemplateLiteralSegments( - new TemplateLiteralStringSegmentNode( - rangeInSource: $this->range([0, 1], [0, 15]), - value: 'The result is: ' - ), - new TemplateLiteralExpressionSegmentNode( - rangeInSource: $this->range([0, 16], [0, 56]), - expression: new ExpressionNode( - rangeInSource: $this->range([0, 18], [0, 55]), - root: new TernaryOperationNode( - condition: new ExpressionNode( - rangeInSource: $this->range([0, 18], [0, 22]), - root: new BinaryOperationNode( - rangeInSource: $this->range([0, 18], [0, 22]), - leftOperand: new ExpressionNode( - rangeInSource: $this->range([0, 18], [0, 18]), - root: new ValueReferenceNode( - rangeInSource: $this->range([0, 18], [0, 18]), - name: VariableName::from('a') - ) - ), - operator: BinaryOperator::LESS_THAN, - rightOperand: new ExpressionNode( - rangeInSource: $this->range([0, 22], [0, 22]), - root: new ValueReferenceNode( - rangeInSource: $this->range([0, 22], [0, 22]), - name: VariableName::from('b') - ) - ), + rangeInSource: $this->range([0, 0], [2, 2]), + indentation: 0, + lines: new TemplateLiteralLines( + new TemplateLiteralLine( + indentation: 0, + segments: new TemplateLiteralSegments( + new TemplateLiteralExpressionSegmentNode( + rangeInSource: $this->range([1, 0], [1, 9]), + expression: new ExpressionNode( + rangeInSource: $this->range([1, 1], [1, 8]), + root: new ValueReferenceNode( + rangeInSource: $this->range([1, 1], [1, 8]), + name: VariableName::from('greeting') ) - ), - trueBranch: new ExpressionNode( - rangeInSource: $this->range([0, 27], [0, 29]), - root: new StringLiteralNode( - rangeInSource: $this->range([0, 27], [0, 29]), - value: 'yes' + ) + ), + new TemplateLiteralStringSegmentNode( + rangeInSource: $this->range([1, 10], [1, 18]), + value: ' to you, ' + ), + new TemplateLiteralExpressionSegmentNode( + rangeInSource: $this->range([1, 19], [1, 26]), + expression: new ExpressionNode( + rangeInSource: $this->range([1, 20], [1, 25]), + root: new ValueReferenceNode( + rangeInSource: $this->range([1, 20], [1, 25]), + name: VariableName::from('friend') ) - ), - falseBranch: new ExpressionNode( - rangeInSource: $this->range([0, 34], [0, 55]), + ) + ), + ) + ) + ) + ); + + $this->assertEquals( + $expectedTemplateLiteralNode, + $templateLiteralParser->parse($lexer) + ); + } + + /** + * @test + */ + public function parsesTemplateLiteralWithComplexExpression(): void + { + $templateLiteralParser = TemplateLiteralParser::singleton(); + $lexer = new Lexer(<<range([0, 0], [2, 2]), + indentation: 0, + lines: new TemplateLiteralLines( + new TemplateLiteralLine( + indentation: 0, + segments: new TemplateLiteralSegments( + new TemplateLiteralStringSegmentNode( + rangeInSource: $this->range([1, 0], [1, 14]), + value: 'The result is: ' + ), + new TemplateLiteralExpressionSegmentNode( + rangeInSource: $this->range([1, 15], [1, 56]), + expression: new ExpressionNode( + rangeInSource: $this->range([1, 16], [1, 55]), root: new TernaryOperationNode( condition: new ExpressionNode( - rangeInSource: $this->range([0, 35], [0, 37]), - root: new ValueReferenceNode( - rangeInSource: $this->range([0, 35], [0, 37]), - name: VariableName::from('foo') - ), + rangeInSource: $this->range([1, 16], [1, 20]), + root: new BinaryOperationNode( + rangeInSource: $this->range([1, 16], [1, 20]), + leftOperand: new ExpressionNode( + rangeInSource: $this->range([1, 16], [1, 16]), + root: new ValueReferenceNode( + rangeInSource: $this->range([1, 16], [1, 16]), + name: VariableName::from('a') + ) + ), + operator: BinaryOperator::LESS_THAN, + rightOperand: new ExpressionNode( + rangeInSource: $this->range([1, 20], [1, 20]), + root: new ValueReferenceNode( + rangeInSource: $this->range([1, 20], [1, 20]), + name: VariableName::from('b') + ) + ), + ) ), trueBranch: new ExpressionNode( - rangeInSource: $this->range([0, 42], [0, 46]), + rangeInSource: $this->range([1, 24], [1, 28]), root: new StringLiteralNode( - rangeInSource: $this->range([0, 42], [0, 46]), - value: 'maybe' + rangeInSource: $this->range([1, 24], [1, 28]), + value: 'yes' ) ), falseBranch: new ExpressionNode( - rangeInSource: $this->range([0, 52], [0, 53]), - root: new StringLiteralNode( - rangeInSource: $this->range([0, 52], [0, 53]), - value: 'no' + rangeInSource: $this->range([1, 32], [1, 55]), + root: new TernaryOperationNode( + condition: new ExpressionNode( + rangeInSource: $this->range([1, 33], [1, 35]), + root: new ValueReferenceNode( + rangeInSource: $this->range([1, 33], [1, 35]), + name: VariableName::from('foo') + ), + ), + trueBranch: new ExpressionNode( + rangeInSource: $this->range([1, 39], [1, 47]), + root: new StringLiteralNode( + rangeInSource: $this->range([1, 39], [1, 47]), + value: 'perhaps' + ) + ), + falseBranch: new ExpressionNode( + rangeInSource: $this->range([1, 51], [1, 54]), + root: new StringLiteralNode( + rangeInSource: $this->range([1, 51], [1, 54]), + value: 'no' + ) + ) ) ) ) ) - ) + ), ) - ), + ) ) ); $this->assertEquals( $expectedTemplateLiteralNode, - $templateLiteralParser->parse($tokens) + $templateLiteralParser->parse($lexer) ); } @@ -268,80 +440,72 @@ public function parsesTemplateLiteralWithComplexExpression(): void public function parsesTemplateLiteralWithEmbeddedTemplateLiteral(): void { $templateLiteralParser = TemplateLiteralParser::singleton(); - $tokens = $this->createTokenIterator('`Lorem ${`ipsum ${foo} sit`} amet`'); + $lexer = new Lexer(<<range([0, 0], [0, 33]), - segments: new TemplateLiteralSegments( - new TemplateLiteralStringSegmentNode( - rangeInSource: $this->range([0, 1], [0, 6]), - value: 'Lorem ' - ), - new TemplateLiteralExpressionSegmentNode( - rangeInSource: $this->range([0, 7], [0, 27]), - expression: new ExpressionNode( - rangeInSource: $this->range([0, 9], [0, 26]), - root: new TemplateLiteralNode( - rangeInSource: $this->range([0, 9], [0, 26]), - segments: new TemplateLiteralSegments( - new TemplateLiteralStringSegmentNode( - rangeInSource: $this->range([0, 10], [0, 15]), - value: 'ipsum ' - ), - new TemplateLiteralExpressionSegmentNode( - rangeInSource: $this->range([0, 16], [0, 21]), - expression: new ExpressionNode( - rangeInSource: $this->range([0, 18], [0, 20]), - root: new ValueReferenceNode( - rangeInSource: $this->range([0, 18], [0, 20]), - name: VariableName::from('foo') + rangeInSource: $this->range([0, 0], [4, 2]), + indentation: 0, + lines: new TemplateLiteralLines( + new TemplateLiteralLine( + indentation: 0, + segments: new TemplateLiteralSegments( + new TemplateLiteralStringSegmentNode( + rangeInSource: $this->range([1, 0], [1, 5]), + value: 'Lorem ' + ), + new TemplateLiteralExpressionSegmentNode( + rangeInSource: $this->range([1, 6], [3, 7]), + expression: new ExpressionNode( + rangeInSource: $this->range([1, 7], [3, 6]), + root: new TemplateLiteralNode( + rangeInSource: $this->range([1, 7], [3, 6]), + indentation: 4, + lines: new TemplateLiteralLines( + new TemplateLiteralLine( + indentation: 4, + segments: new TemplateLiteralSegments( + new TemplateLiteralStringSegmentNode( + rangeInSource: $this->range([2, 4], [2, 9]), + value: 'ipsum ' + ), + new TemplateLiteralExpressionSegmentNode( + rangeInSource: $this->range([2, 10], [2, 14]), + expression: new ExpressionNode( + rangeInSource: $this->range([2, 11], [2, 13]), + root: new ValueReferenceNode( + rangeInSource: $this->range([2, 11], [2, 13]), + name: VariableName::from('foo') + ) + ) + ), + new TemplateLiteralStringSegmentNode( + rangeInSource: $this->range([2, 15], [2, 18]), + value: ' sit' + ) + ) ) ) - ), - new TemplateLiteralStringSegmentNode( - rangeInSource: $this->range([0, 22], [0, 25]), - value: ' sit' ) ) + ), + new TemplateLiteralStringSegmentNode( + rangeInSource: $this->range([3, 8], [3, 12]), + value: ' amet' ) ) - ), - new TemplateLiteralStringSegmentNode( - rangeInSource: $this->range([0, 28], [0, 32]), - value: ' amet' ) ) ); $this->assertEquals( $expectedTemplateLiteralNode, - $templateLiteralParser->parse($tokens) + $templateLiteralParser->parse($lexer) ); } - - /** - * @test - */ - public function toleratesIsolatedDollarSigns(): void - { - $this->markTestSkipped('@TODO: This will require significant redesign of the tokenizer.'); - - // $templateLiteralParser = TemplateLiteralParser::singleton(); - // $tokens = $this->createTokenIterator('`$$$$$$$$`'); - - // $expectedTemplateLiteralNode = new TemplateLiteralNode( - // rangeInSource: $this->range([0, 0], [0, 9]), - // segments: new TemplateLiteralSegments( - // new TemplateLiteralStringSegmentNode( - // rangeInSource: $this->range([0, 1], [0, 8]), - // value: '$$$$$$$$' - // ) - // ) - // ); - - // $this->assertEquals( - // $expectedTemplateLiteralNode, - // $templateLiteralParser->parse($tokens) - // ); - } } diff --git a/test/Unit/Language/Parser/Text/TextParserTest.php b/test/Unit/Language/Parser/Text/TextParserTest.php index 04edcbe..819be87 100644 --- a/test/Unit/Language/Parser/Text/TextParserTest.php +++ b/test/Unit/Language/Parser/Text/TextParserTest.php @@ -23,6 +23,7 @@ namespace PackageFactory\ComponentEngine\Test\Unit\Language\Parser\Text; use PackageFactory\ComponentEngine\Language\AST\Node\Text\TextNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Parser\Text\TextParser; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; @@ -34,10 +35,10 @@ final class TextParserTest extends ParserTestCase public function parsesEmptyStringToNull(): void { $textParser = TextParser::singleton(); - $tokens = $this->createTokenIterator(''); + $lexer = new Lexer(''); $this->assertNull( - $textParser->parse($tokens) + $textParser->parse($lexer) ); } @@ -47,9 +48,9 @@ public function parsesEmptyStringToNull(): void public function parsesTextWithSpacesOnlyToNull(): void { $textParser = TextParser::singleton(); - $tokens = $this->createTokenIterator(" \t \n \t "); + $lexer = new Lexer(" \t \n \t "); - $this->assertNull($textParser->parse($tokens)); + $this->assertNull($textParser->parse($lexer)); } /** @@ -58,7 +59,7 @@ public function parsesTextWithSpacesOnlyToNull(): void public function parsesTrivialText(): void { $textParser = TextParser::singleton(); - $tokens = $this->createTokenIterator('Hello World'); + $lexer = new Lexer('Hello World'); $expectedTextNode = new TextNode( rangeInSource: $this->range([0, 0], [0, 10]), @@ -67,7 +68,7 @@ public function parsesTrivialText(): void $this->assertEquals( $expectedTextNode, - $textParser->parse($tokens) + $textParser->parse($lexer) ); } @@ -77,7 +78,7 @@ public function parsesTrivialText(): void public function trimsLeadingAndTrailingSpaces(): void { $textParser = TextParser::singleton(); - $tokens = $this->createTokenIterator(" \t\t Hello World \t\t "); + $lexer = new Lexer(" \t\t Hello World \t\t "); $expectedTextNode = new TextNode( rangeInSource: $this->range([0, 0], [0, 22]), @@ -86,7 +87,7 @@ public function trimsLeadingAndTrailingSpaces(): void $this->assertEquals( $expectedTextNode, - $textParser->parse($tokens) + $textParser->parse($lexer) ); } @@ -96,7 +97,7 @@ public function trimsLeadingAndTrailingSpaces(): void public function trimsLeadingLineBreak(): void { $textParser = TextParser::singleton(); - $tokens = $this->createTokenIterator("\nHello World"); + $lexer = new Lexer("\nHello World"); $expectedTextNode = new TextNode( rangeInSource: $this->range([0, 0], [1, 10]), @@ -105,7 +106,7 @@ public function trimsLeadingLineBreak(): void $this->assertEquals( $expectedTextNode, - $textParser->parse($tokens) + $textParser->parse($lexer) ); } @@ -115,7 +116,7 @@ public function trimsLeadingLineBreak(): void public function trimsLeadingLineBreakAndIndentation(): void { $textParser = TextParser::singleton(); - $tokens = $this->createTokenIterator("\n Hello World"); + $lexer = new Lexer("\n Hello World"); $expectedTextNode = new TextNode( rangeInSource: $this->range([0, 0], [1, 14]), @@ -124,7 +125,7 @@ public function trimsLeadingLineBreakAndIndentation(): void $this->assertEquals( $expectedTextNode, - $textParser->parse($tokens) + $textParser->parse($lexer) ); } @@ -134,7 +135,7 @@ public function trimsLeadingLineBreakAndIndentation(): void public function preservesLeadingSpaceIfFlagIsSet(): void { $textParser = TextParser::singleton(); - $tokens = $this->createTokenIterator(" \t\t Hello World \t\t "); + $lexer = new Lexer(" \t\t Hello World \t\t "); $expectedTextNode = new TextNode( rangeInSource: $this->range([0, 0], [0, 22]), @@ -143,7 +144,7 @@ public function preservesLeadingSpaceIfFlagIsSet(): void $this->assertEquals( $expectedTextNode, - $textParser->parse($tokens, true) + $textParser->parse($lexer, true) ); } @@ -153,7 +154,7 @@ public function preservesLeadingSpaceIfFlagIsSet(): void public function reducesInnerSpacesToSingleSpaceCharacterEach(): void { $textParser = TextParser::singleton(); - $tokens = $this->createTokenIterator("Hello \t \n \t folks and\t\t\tpeople"); + $lexer = new Lexer("Hello \t \n \t folks and\t\t\tpeople"); $expectedTextNode = new TextNode( rangeInSource: $this->range([0, 0], [1, 22]), @@ -162,7 +163,7 @@ public function reducesInnerSpacesToSingleSpaceCharacterEach(): void $this->assertEquals( $expectedTextNode, - $textParser->parse($tokens) + $textParser->parse($lexer) ); } @@ -172,7 +173,7 @@ public function reducesInnerSpacesToSingleSpaceCharacterEach(): void public function terminatesAtEmbeddedExpressionAndTrimsLeadingSpace(): void { $textParser = TextParser::singleton(); - $tokens = $this->createTokenIterator(" Hello{"); + $lexer = new Lexer(" Hello{"); $expectedTextNode = new TextNode( rangeInSource: $this->range([0, 0], [0, 8]), @@ -181,7 +182,7 @@ public function terminatesAtEmbeddedExpressionAndTrimsLeadingSpace(): void $this->assertEquals( $expectedTextNode, - $textParser->parse($tokens) + $textParser->parse($lexer) ); } @@ -191,7 +192,7 @@ public function terminatesAtEmbeddedExpressionAndTrimsLeadingSpace(): void public function terminatesAtEmbeddedExpressionAndKeepsTrailingSpace(): void { $textParser = TextParser::singleton(); - $tokens = $this->createTokenIterator("Hello \t {foo}!"); + $lexer = new Lexer("Hello \t {foo}!"); $expectedTextNode = new TextNode( rangeInSource: $this->range([0, 0], [0, 7]), @@ -200,7 +201,7 @@ public function terminatesAtEmbeddedExpressionAndKeepsTrailingSpace(): void $this->assertEquals( $expectedTextNode, - $textParser->parse($tokens) + $textParser->parse($lexer) ); } @@ -210,7 +211,7 @@ public function terminatesAtEmbeddedExpressionAndKeepsTrailingSpace(): void public function terminatesAtEmbeddedExpressionAndTrimsTrailingSpaceIfItContainsLineBreaks(): void { $textParser = TextParser::singleton(); - $tokens = $this->createTokenIterator("Hello \n\t {foo}!"); + $lexer = new Lexer("Hello \n\t {foo}!"); $expectedTextNode = new TextNode( rangeInSource: $this->range([0, 0], [1, 1]), @@ -219,7 +220,7 @@ public function terminatesAtEmbeddedExpressionAndTrimsTrailingSpaceIfItContainsL $this->assertEquals( $expectedTextNode, - $textParser->parse($tokens) + $textParser->parse($lexer) ); } @@ -229,9 +230,9 @@ public function terminatesAtEmbeddedExpressionAndTrimsTrailingSpaceIfItContainsL public function returnsNullAtEmbeddedExpressionIfTheresOnlySpace(): void { $textParser = TextParser::singleton(); - $tokens = $this->createTokenIterator(" \n\t {foo}!"); + $lexer = new Lexer(" \n\t {foo}!"); - $this->assertNull($textParser->parse($tokens)); + $this->assertNull($textParser->parse($lexer)); } /** @@ -240,7 +241,7 @@ public function returnsNullAtEmbeddedExpressionIfTheresOnlySpace(): void public function terminatesAtOpeningTagAndTrimsLeadingSpace(): void { $textParser = TextParser::singleton(); - $tokens = $this->createTokenIterator(" Hello"); + $lexer = new Lexer(" Hello"); $expectedTextNode = new TextNode( rangeInSource: $this->range([0, 0], [0, 8]), @@ -249,7 +250,7 @@ public function terminatesAtOpeningTagAndTrimsLeadingSpace(): void $this->assertEquals( $expectedTextNode, - $textParser->parse($tokens) + $textParser->parse($lexer) ); } @@ -259,7 +260,7 @@ public function terminatesAtOpeningTagAndTrimsLeadingSpace(): void public function terminatesAtOpeningTagAndKeepsTrailingSpace(): void { $textParser = TextParser::singleton(); - $tokens = $this->createTokenIterator("Hello \t World"); + $lexer = new Lexer("Hello \t World"); $expectedTextNode = new TextNode( rangeInSource: $this->range([0, 0], [0, 7]), @@ -268,7 +269,7 @@ public function terminatesAtOpeningTagAndKeepsTrailingSpace(): void $this->assertEquals( $expectedTextNode, - $textParser->parse($tokens) + $textParser->parse($lexer) ); } @@ -278,7 +279,7 @@ public function terminatesAtOpeningTagAndKeepsTrailingSpace(): void public function terminatesAtOpeningTagAndTrimsTrailingSpaceIfItContainsLineBreaks(): void { $textParser = TextParser::singleton(); - $tokens = $this->createTokenIterator("Hello \n\t World"); + $lexer = new Lexer("Hello \n\t World"); $expectedTextNode = new TextNode( rangeInSource: $this->range([0, 0], [1, 1]), @@ -287,7 +288,7 @@ public function terminatesAtOpeningTagAndTrimsTrailingSpaceIfItContainsLineBreak $this->assertEquals( $expectedTextNode, - $textParser->parse($tokens) + $textParser->parse($lexer) ); } @@ -297,9 +298,9 @@ public function terminatesAtOpeningTagAndTrimsTrailingSpaceIfItContainsLineBreak public function returnsNullAtOpeningTagIfTheresOnlySpace(): void { $textParser = TextParser::singleton(); - $tokens = $this->createTokenIterator(" \n\t "); + $lexer = new Lexer(" \n\t "); - $this->assertNull($textParser->parse($tokens)); + $this->assertNull($textParser->parse($lexer)); } /** @@ -308,7 +309,7 @@ public function returnsNullAtOpeningTagIfTheresOnlySpace(): void public function terminatesAtClosingTagAndTrimsTrailingSpace(): void { $textParser = TextParser::singleton(); - $tokens = $this->createTokenIterator("World \n\t "); + $lexer = new Lexer("World \n\t "); $expectedTextNode = new TextNode( rangeInSource: $this->range([0, 0], [1, 1]), @@ -317,7 +318,7 @@ public function terminatesAtClosingTagAndTrimsTrailingSpace(): void $this->assertEquals( $expectedTextNode, - $textParser->parse($tokens) + $textParser->parse($lexer) ); } @@ -327,8 +328,8 @@ public function terminatesAtClosingTagAndTrimsTrailingSpace(): void public function returnsNullAtClosingTagIfTheresOnlySpace(): void { $textParser = TextParser::singleton(); - $tokens = $this->createTokenIterator(" \n\t "); + $lexer = new Lexer(" \n\t "); - $this->assertNull($textParser->parse($tokens)); + $this->assertNull($textParser->parse($lexer)); } } diff --git a/test/Unit/Language/Parser/TypeReference/TypeReferenceParserTest.php b/test/Unit/Language/Parser/TypeReference/TypeReferenceParserTest.php index d0f9571..1881350 100644 --- a/test/Unit/Language/Parser/TypeReference/TypeReferenceParserTest.php +++ b/test/Unit/Language/Parser/TypeReference/TypeReferenceParserTest.php @@ -29,6 +29,7 @@ use PackageFactory\ComponentEngine\Language\AST\Node\TypeReference\TypeNameNode; use PackageFactory\ComponentEngine\Language\AST\Node\TypeReference\TypeNameNodes; use PackageFactory\ComponentEngine\Language\AST\Node\TypeReference\TypeReferenceNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Parser\TypeReference\TypeReferenceParser; use PackageFactory\ComponentEngine\Language\Parser\ParserException; use PackageFactory\ComponentEngine\Language\Parser\TypeReference\TypeReferenceCouldNotBeParsed; @@ -42,7 +43,7 @@ final class TypeReferenceParserTest extends ParserTestCase public function parsesSimpleTypeReference(): void { $typeReferenceParser = TypeReferenceParser::singleton(); - $tokens = $this->createTokenIterator('Foo'); + $lexer = new Lexer('Foo'); $expectedTypeReferenceNode = new TypeReferenceNode( rangeInSource: $this->range([0, 0], [0, 2]), @@ -58,7 +59,7 @@ public function parsesSimpleTypeReference(): void $this->assertEquals( $expectedTypeReferenceNode, - $typeReferenceParser->parse($tokens) + $typeReferenceParser->parse($lexer) ); } @@ -68,7 +69,7 @@ public function parsesSimpleTypeReference(): void public function parsesArrayTypeReference(): void { $typeReferenceParser = TypeReferenceParser::singleton(); - $tokens = $this->createTokenIterator('Foo[]'); + $lexer = new Lexer('Foo[]'); $expectedTypeReferenceNode = new TypeReferenceNode( rangeInSource: $this->range([0, 0], [0, 4]), @@ -84,7 +85,7 @@ public function parsesArrayTypeReference(): void $this->assertEquals( $expectedTypeReferenceNode, - $typeReferenceParser->parse($tokens) + $typeReferenceParser->parse($lexer) ); } @@ -94,7 +95,7 @@ public function parsesArrayTypeReference(): void public function parsesOptionalTypeReference(): void { $typeReferenceParser = TypeReferenceParser::singleton(); - $tokens = $this->createTokenIterator('?Foo'); + $lexer = new Lexer('?Foo'); $expectedTypeReferenceNode = new TypeReferenceNode( rangeInSource: $this->range([0, 0], [0, 3]), @@ -110,7 +111,7 @@ public function parsesOptionalTypeReference(): void $this->assertEquals( $expectedTypeReferenceNode, - $typeReferenceParser->parse($tokens) + $typeReferenceParser->parse($lexer) ); } @@ -120,7 +121,7 @@ public function parsesOptionalTypeReference(): void public function parsesUnionTypeReference(): void { $typeReferenceParser = TypeReferenceParser::singleton(); - $tokens = $this->createTokenIterator('Foo|Bar|Baz'); + $lexer = new Lexer('Foo|Bar|Baz'); $expectedTypeReferenceNode = new TypeReferenceNode( rangeInSource: $this->range([0, 0], [0, 10]), @@ -144,7 +145,7 @@ public function parsesUnionTypeReference(): void $this->assertEquals( $expectedTypeReferenceNode, - $typeReferenceParser->parse($tokens) + $typeReferenceParser->parse($lexer) ); } @@ -154,7 +155,7 @@ public function parsesUnionTypeReference(): void public function throwsIfInvalidTypeReferenceOccurs(): void { $typeReferenceParser = TypeReferenceParser::singleton(); - $tokens = $this->createTokenIterator('?Foo[]'); + $lexer = new Lexer('?Foo[]'); $this->expectException(ParserException::class); $this->expectExceptionObject( @@ -166,7 +167,7 @@ public function throwsIfInvalidTypeReferenceOccurs(): void ) ); - $typeReferenceParser->parse($tokens); + $typeReferenceParser->parse($lexer); } /** @@ -175,7 +176,7 @@ public function throwsIfInvalidTypeReferenceOccurs(): void public function throwsIfDuplicatesOccurInUnionTypeReference(): void { $typeReferenceParser = TypeReferenceParser::singleton(); - $tokens = $this->createTokenIterator('Foo|Bar|Foo|Baz'); + $lexer = new Lexer('Foo|Bar|Foo|Baz'); $this->expectException(ParserException::class); $this->expectExceptionObject( @@ -189,6 +190,6 @@ public function throwsIfDuplicatesOccurInUnionTypeReference(): void ) ); - $typeReferenceParser->parse($tokens); + $typeReferenceParser->parse($lexer); } } diff --git a/test/Unit/Language/Parser/ValueReference/ValueReferenceParserTest.php b/test/Unit/Language/Parser/ValueReference/ValueReferenceParserTest.php index a73e70d..7b13c44 100644 --- a/test/Unit/Language/Parser/ValueReference/ValueReferenceParserTest.php +++ b/test/Unit/Language/Parser/ValueReference/ValueReferenceParserTest.php @@ -24,6 +24,7 @@ use PackageFactory\ComponentEngine\Domain\VariableName\VariableName; use PackageFactory\ComponentEngine\Language\AST\Node\ValueReference\ValueReferenceNode; +use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Parser\ValueReference\ValueReferenceParser; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; @@ -35,7 +36,7 @@ final class ValueReferenceParserTest extends ParserTestCase public function parsesValueReference(): void { $valueReferenceParser = ValueReferenceParser::singleton(); - $tokens = $this->createTokenIterator('foo'); + $lexer = new Lexer('foo'); $expectedValueReferenceNode = new ValueReferenceNode( rangeInSource: $this->range([0, 0], [0, 2]), @@ -44,7 +45,7 @@ public function parsesValueReference(): void $this->assertEquals( $expectedValueReferenceNode, - $valueReferenceParser->parse($tokens) + $valueReferenceParser->parse($lexer) ); } } diff --git a/test/Unit/Target/Php/Transpiler/IntegerLiteral/IntegerLiteralTranspilerTest.php b/test/Unit/Target/Php/Transpiler/IntegerLiteral/IntegerLiteralTranspilerTest.php index b44c24e..5b3bcde 100644 --- a/test/Unit/Target/Php/Transpiler/IntegerLiteral/IntegerLiteralTranspilerTest.php +++ b/test/Unit/Target/Php/Transpiler/IntegerLiteral/IntegerLiteralTranspilerTest.php @@ -42,7 +42,6 @@ public static function integerLiteralExamples(): array // Binary ' 0b10000000000000000000000000000000 ' => ['0b10000000000000000000000000000000', '0b10000000000000000000000000000000'], ' 0b01111111100000000000000000000000 ' => ['0b01111111100000000000000000000000', '0b01111111100000000000000000000000'], - ' 0B00000000011111111111111111111111 ' => ['0B00000000011111111111111111111111', '0b00000000011111111111111111111111'], // Octal ' 0o755 ' => ['0o755', '0o755'], diff --git a/test/Unit/Target/Php/Transpiler/TemplateLiteral/TemplateLiteralTranspilerTest.php b/test/Unit/Target/Php/Transpiler/TemplateLiteral/TemplateLiteralTranspilerTest.php index fbe8bda..0ed1bb7 100644 --- a/test/Unit/Target/Php/Transpiler/TemplateLiteral/TemplateLiteralTranspilerTest.php +++ b/test/Unit/Target/Php/Transpiler/TemplateLiteral/TemplateLiteralTranspilerTest.php @@ -30,32 +30,39 @@ final class TemplateLiteralTranspilerTest extends TestCase { /** - * @return array + * @return iterable */ - public static function templateLiteralExamples(): array + public static function templateLiteralExamples(): iterable { - return [ - '`Hello World`' => [ - '`Hello World`', - '\'Hello World\'' - ], - '`Hello ${name}`' => [ - '`Hello ${name}`', - '\'Hello \' . $this->name' - ], - '`${greeting} World`' => [ - '`${greeting} World`', - '$this->greeting . \' World\'' - ], - '`Hello ${name}! How are you?`' => [ - '`Hello ${name}! How are you?`', - '\'Hello \' . $this->name . \'! How are you?\'' - ], - '`Hello ${name}! ${question}?`' => [ - '`Hello ${name}! ${question}?`', - '\'Hello \' . $this->name . \'! \' . $this->question . \'?\'' - ], - ]; + yield $source = << [$source, '\'Hello World\'']; + + yield $source = << [$source, '\'Hello \' . $this->name']; + + yield $source = << [$source, '$this->greeting . \' World\'']; + + yield $source = << [$source, '\'Hello \' . $this->name . \'! How are you?\'']; + + yield $source = << [$source, '\'Hello \' . $this->name . \'! \' . $this->question . \'?\'']; } /** diff --git a/test/Unit/TypeSystem/Resolver/Expression/ExpressionTypeResolverTest.php b/test/Unit/TypeSystem/Resolver/Expression/ExpressionTypeResolverTest.php index e5d7f9d..107156b 100644 --- a/test/Unit/TypeSystem/Resolver/Expression/ExpressionTypeResolverTest.php +++ b/test/Unit/TypeSystem/Resolver/Expression/ExpressionTypeResolverTest.php @@ -246,16 +246,37 @@ public function resolvesTagToStringType(): void } /** - * @return array + * @return iterable */ - public static function templateLiteralExamples(): array + public static function templateLiteralExamples(): iterable { - return [ - '`Hello World`' => ['`Hello World`'], - '`Hello ${name}`' => ['`Hello ${name}`'], - '`${greeting} World`' => ['`${greeting} World`'], - '`Hello ${name}! How are you?`' => ['`Hello ${name}! How are you?`'], - ]; + $source = << [$source]; + + $source = << [$source]; + + $source = << [$source]; + + $source = << [$source]; } /** diff --git a/test/Unit/TypeSystem/Resolver/TemplateLiteral/TemplateLiteralTypeResolverTest.php b/test/Unit/TypeSystem/Resolver/TemplateLiteral/TemplateLiteralTypeResolverTest.php index 2913709..9bf871b 100644 --- a/test/Unit/TypeSystem/Resolver/TemplateLiteral/TemplateLiteralTypeResolverTest.php +++ b/test/Unit/TypeSystem/Resolver/TemplateLiteral/TemplateLiteralTypeResolverTest.php @@ -30,16 +30,37 @@ final class TemplateLiteralTypeResolverTest extends TestCase { /** - * @return array + * @return iterable */ - public static function templateLiteralExamples(): array + public static function templateLiteralExamples(): iterable { - return [ - '`Hello World`' => ['`Hello World`'], - '`Hello ${name}`' => ['`Hello ${name}`'], - '`${greeting} World`' => ['`${greeting} World`'], - '`Hello ${name}! How are you?`' => ['`Hello ${name}! How are you?`'], - ]; + $source = << [$source]; + + $source = << [$source]; + + $source = << [$source]; + + $source = << [$source]; } /** From 66e0e7c75502d8fdbbe97c3d631550faea772845 Mon Sep 17 00:00:00 2001 From: Wilhelm Behncke Date: Fri, 11 Aug 2023 13:24:07 +0200 Subject: [PATCH 05/19] TASK: Remove Tokenizer and all related obsolete concepts --- .../AST/Node/IntegerLiteral/IntegerFormat.php | 2 - .../Parser/Module/ModuleCouldNotBeParsed.php | 1 - .../StructDeclarationParser.php | 2 - .../Parser/Tag/TagCouldNotBeParsed.php | 22 +- src/Parser/Source/Fragment.php | 63 --- src/Parser/Source/Source.php | 33 +- src/Parser/Tokenizer/Buffer.php | 62 --- src/Parser/Tokenizer/CharacterType.php | 68 --- src/Parser/Tokenizer/LookAhead.php | 80 --- src/Parser/Tokenizer/Scanner.php | 190 ------- src/Parser/Tokenizer/Token.php | 73 --- src/Parser/Tokenizer/TokenType.php | 226 -------- src/Parser/Tokenizer/TokenTypes.php | 66 --- src/Parser/Tokenizer/Tokenizer.php | 484 ------------------ .../PhpTranspilerIntegrationTest.php | 1 - test/Unit/Language/Parser/ParserTestCase.php | 15 - test/Unit/Parser/Tokenizer/Fixtures.php | 42 -- test/Unit/Parser/Tokenizer/TokenTest.php | 55 -- test/Unit/Parser/Tokenizer/TokenTypesTest.php | 110 ---- test/Unit/Parser/Tokenizer/TokenizerTest.php | 168 ------ 20 files changed, 4 insertions(+), 1759 deletions(-) delete mode 100644 src/Parser/Source/Fragment.php delete mode 100644 src/Parser/Tokenizer/Buffer.php delete mode 100644 src/Parser/Tokenizer/CharacterType.php delete mode 100644 src/Parser/Tokenizer/LookAhead.php delete mode 100644 src/Parser/Tokenizer/Scanner.php delete mode 100644 src/Parser/Tokenizer/Token.php delete mode 100644 src/Parser/Tokenizer/TokenType.php delete mode 100644 src/Parser/Tokenizer/TokenTypes.php delete mode 100644 src/Parser/Tokenizer/Tokenizer.php delete mode 100644 test/Unit/Parser/Tokenizer/Fixtures.php delete mode 100644 test/Unit/Parser/Tokenizer/TokenTest.php delete mode 100644 test/Unit/Parser/Tokenizer/TokenTypesTest.php delete mode 100644 test/Unit/Parser/Tokenizer/TokenizerTest.php diff --git a/src/Language/AST/Node/IntegerLiteral/IntegerFormat.php b/src/Language/AST/Node/IntegerLiteral/IntegerFormat.php index 445ffd1..a4ce931 100644 --- a/src/Language/AST/Node/IntegerLiteral/IntegerFormat.php +++ b/src/Language/AST/Node/IntegerLiteral/IntegerFormat.php @@ -22,8 +22,6 @@ namespace PackageFactory\ComponentEngine\Language\AST\Node\IntegerLiteral; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; - enum IntegerFormat: string { case BINARY = 'BINARY'; diff --git a/src/Language/Parser/Module/ModuleCouldNotBeParsed.php b/src/Language/Parser/Module/ModuleCouldNotBeParsed.php index 4817cb5..88a6058 100644 --- a/src/Language/Parser/Module/ModuleCouldNotBeParsed.php +++ b/src/Language/Parser/Module/ModuleCouldNotBeParsed.php @@ -23,7 +23,6 @@ namespace PackageFactory\ComponentEngine\Language\Parser\Module; use PackageFactory\ComponentEngine\Language\Parser\ParserException; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; final class ModuleCouldNotBeParsed extends ParserException { diff --git a/src/Language/Parser/StructDeclaration/StructDeclarationParser.php b/src/Language/Parser/StructDeclaration/StructDeclarationParser.php index 57f88f8..b6a6a47 100644 --- a/src/Language/Parser/StructDeclaration/StructDeclarationParser.php +++ b/src/Language/Parser/StructDeclaration/StructDeclarationParser.php @@ -29,10 +29,8 @@ use PackageFactory\ComponentEngine\Language\AST\Node\StructDeclaration\StructNameNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; use PackageFactory\ComponentEngine\Language\Parser\PropertyDeclaration\PropertyDeclarationParser; use PackageFactory\ComponentEngine\Parser\Source\Range; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token as TokenizerToken; final class StructDeclarationParser { diff --git a/src/Language/Parser/Tag/TagCouldNotBeParsed.php b/src/Language/Parser/Tag/TagCouldNotBeParsed.php index 9140875..46f39b4 100644 --- a/src/Language/Parser/Tag/TagCouldNotBeParsed.php +++ b/src/Language/Parser/Tag/TagCouldNotBeParsed.php @@ -25,11 +25,11 @@ use PackageFactory\ComponentEngine\Domain\TagName\TagName; use PackageFactory\ComponentEngine\Language\Parser\ParserException; use PackageFactory\ComponentEngine\Parser\Source\Range; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenTypes; final class TagCouldNotBeParsed extends ParserException { + protected const TITLE = 'Tag could not be parsed'; + public static function becauseOfClosingTagNameMismatch( TagName $expectedTagName, string $actualTagName, @@ -38,27 +38,11 @@ public static function becauseOfClosingTagNameMismatch( return new self( code: 1690976372, message: sprintf( - 'Tag could not be parsed, because the closing tag name "%s" did not match the opening tag name "%s".', + 'Closing tag name "%s" did not match the opening tag name "%s".', $actualTagName, $expectedTagName->value ), affectedRangeInSource: $affectedRangeInSource ); } - - public static function becauseOfUnexpectedToken( - TokenTypes $expectedTokenTypes, - Token $actualToken - ): self { - return new self( - code: 1691156112, - message: sprintf( - 'Tag could not be parsed because of unexpected token %s. ' - . 'Expected %s instead.', - $actualToken->toDebugString(), - $expectedTokenTypes->toDebugString() - ), - affectedRangeInSource: $actualToken->boundaries - ); - } } diff --git a/src/Parser/Source/Fragment.php b/src/Parser/Source/Fragment.php deleted file mode 100644 index 8f4bc73..0000000 --- a/src/Parser/Source/Fragment.php +++ /dev/null @@ -1,63 +0,0 @@ -. - */ - -declare(strict_types=1); - -namespace PackageFactory\ComponentEngine\Parser\Source; - -final class Fragment -{ - private function __construct( - public readonly string $value, - public readonly Position $start, - public readonly Position $end, - public readonly Source $source - ) { - } - - public static function create( - string $value, - Position $start, - Position $end, - Source $source - ): Fragment { - return new Fragment( - $value, - $start, - $end, - $source - ); - } - - public function append(Fragment $other): Fragment - { - return new Fragment( - $this->value . $other->value, - $this->start, - $other->end, - $this->source - ); - } - - public function __toString(): string - { - return $this->value; - } -} diff --git a/src/Parser/Source/Source.php b/src/Parser/Source/Source.php index ba143ca..dfc7bbe 100644 --- a/src/Parser/Source/Source.php +++ b/src/Parser/Source/Source.php @@ -22,10 +22,7 @@ namespace PackageFactory\ComponentEngine\Parser\Source; -/** - * @implements \IteratorAggregate - */ -final class Source implements \IteratorAggregate +final class Source { public function __construct( public readonly Path $path, @@ -51,32 +48,4 @@ public function equals(Source $other): bool { return $this->contents === $other->contents; } - - /** - * @return \Iterator - */ - public function getIterator(): \Iterator - { - $lineNumber = 0; - $columnNumber = 0; - $length = strlen($this->contents); - - for ($index = 0; $index < $length; $index++) { - $character = $this->contents[$index]; - - yield Fragment::create( - $character, - new Position($lineNumber, $columnNumber), - new Position($lineNumber, $columnNumber), - $this - ); - - if ($character === "\n") { - $lineNumber++; - $columnNumber = 0; - } else { - $columnNumber++; - } - } - } } diff --git a/src/Parser/Tokenizer/Buffer.php b/src/Parser/Tokenizer/Buffer.php deleted file mode 100644 index 145ff08..0000000 --- a/src/Parser/Tokenizer/Buffer.php +++ /dev/null @@ -1,62 +0,0 @@ -. - */ - -declare(strict_types=1); - -namespace PackageFactory\ComponentEngine\Parser\Tokenizer; - -use PackageFactory\ComponentEngine\Parser\Source\Fragment; - -final class Buffer -{ - private function __construct( - private ?Fragment $fragment - ) { - } - - public static function empty(): self - { - return new self(null); - } - - public function append(Fragment $fragment): self - { - $this->fragment = $this->fragment?->append($fragment) ?? $fragment; - return $this; - } - - public function value(): string - { - return $this->fragment?->value ?? ''; - } - - public function isEmpty(): bool - { - return $this->fragment === null; - } - - public function flush(TokenType $tokenType): \Iterator - { - if ($this->fragment !== null) { - yield Token::fromFragment($tokenType, $this->fragment); - $this->fragment = null; - } - } -} diff --git a/src/Parser/Tokenizer/CharacterType.php b/src/Parser/Tokenizer/CharacterType.php deleted file mode 100644 index b74703b..0000000 --- a/src/Parser/Tokenizer/CharacterType.php +++ /dev/null @@ -1,68 +0,0 @@ -. - */ - -declare(strict_types=1); - -namespace PackageFactory\ComponentEngine\Parser\Tokenizer; - -enum CharacterType -{ - case BRACKET_OPEN; - case BRACKET_CLOSE; - case ANGLE_OPEN; - case ANGLE_CLOSE; - case STRING_DELIMITER; - case TEMPLATE_LITERAL_DELIMITER; - case COMMENT_DELIMITER; - case ESCAPE; - case FORWARD_SLASH; - case PERIOD; - case SYMBOL; - case DIGIT; - case SPACE; - case OTHER; - - public static function get(string $character): self - { - return match ($character) { - '(', '[', '{' => self::BRACKET_OPEN, - ')', ']', '}' => self::BRACKET_CLOSE, - '<' => self::ANGLE_OPEN, - '>' => self::ANGLE_CLOSE, - '\'', '"' => self::STRING_DELIMITER, - '`' => self::TEMPLATE_LITERAL_DELIMITER, - '#' => self::COMMENT_DELIMITER, - '\\' => self::ESCAPE, - '/' => self::FORWARD_SLASH, - '.' => self::PERIOD, - '!', '%', '&', '|', '=', '?', ':', '-', ',', '+', '*', '$' => self::SYMBOL, - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' => self::DIGIT, - default => match (true) { - ctype_space($character) => self::SPACE, - default => self::OTHER - } - }; - } - - public function is(string $character): bool - { - return self::get($character) === $this; - } -} diff --git a/src/Parser/Tokenizer/LookAhead.php b/src/Parser/Tokenizer/LookAhead.php deleted file mode 100644 index 4f7c325..0000000 --- a/src/Parser/Tokenizer/LookAhead.php +++ /dev/null @@ -1,80 +0,0 @@ -. - */ - -declare(strict_types=1); - -namespace PackageFactory\ComponentEngine\Parser\Tokenizer; - -/** - * @implements \IteratorAggregate - */ -final class LookAhead implements \IteratorAggregate -{ - /** - * @var Token[] - */ - private array $buffer = []; - - /** - * @param \Iterator $tokens - */ - private function __construct( - public \Iterator $tokens - ) { - } - - /** - * @param \Iterator $tokens - * @return self - */ - public static function fromTokens(\Iterator $tokens): self - { - return new self(tokens: $tokens); - } - - /** - * @return \Iterator - */ - public function getIterator(): \Iterator - { - foreach ($this->buffer as $token) { - yield $token; - } - - if (!Scanner::isEnd($this->tokens)) { - yield from $this->tokens; - } - } - - public function shift(): void - { - Scanner::assertValid($this->tokens); - $this->buffer[] = $this->tokens->current(); - Scanner::skipOne($this->tokens); - } - - public function type(): ?TokenType - { - if (Scanner::isEnd($this->tokens)) { - return null; - } - return Scanner::type($this->tokens); - } -} diff --git a/src/Parser/Tokenizer/Scanner.php b/src/Parser/Tokenizer/Scanner.php deleted file mode 100644 index 3e11f3e..0000000 --- a/src/Parser/Tokenizer/Scanner.php +++ /dev/null @@ -1,190 +0,0 @@ -. - */ - -declare(strict_types=1); - -namespace PackageFactory\ComponentEngine\Parser\Tokenizer; - -use PackageFactory\ComponentEngine\Parser\Source\Path; - -final class Scanner -{ - /** - * @param \Iterator $tokens - * @return void - */ - public static function assertValid(\Iterator $tokens): void - { - if (!$tokens->valid()) { - throw new \Exception("@TODO: Unexpected end of file."); - } - } - - /** - * @param \Iterator $tokens - * @param TokenType ...$types - * @return void - */ - public static function assertType(\Iterator $tokens, TokenType ...$types): void - { - self::assertValid($tokens); - - $actualType = $tokens->current()->type; - foreach ($types as $expectedType) { - if ($actualType === $expectedType) { - return; - } - } - - throw new \Exception( - "@TODO: Unexpected token: " - . $actualType->value - . " at " - . ($tokens->current()->boundaries->start->lineNumber + 1) - . ":" - . ($tokens->current()->boundaries->start->columnNumber + 1) - ); - } - - /** - * @param \Iterator $tokens - * @param string ...$values - * @return void - */ - public static function assertValue(\Iterator $tokens, string ...$values): void - { - self::assertValid($tokens); - - $actualValue = $tokens->current()->value; - foreach ($values as $expectedValue) { - if ($actualValue === $expectedValue) { - return; - } - } - - throw new \Exception("@TODO: Unexpected value: " . $actualValue); - } - - /** - * @param \Iterator $tokens - * @return \Iterator - */ - public static function skipOne(\Iterator &$tokens): \Iterator - { - $tokens->next(); - return $tokens; - } - - /** - * @param \Iterator $tokens - * @return void - */ - public static function skipSpace(\Iterator $tokens): void - { - while ( - $tokens->valid() && match ($tokens->current()->type) { - TokenType::SPACE, - TokenType::END_OF_LINE => true, - default => false - } - ) { - $tokens->next(); - } - } - - /** - * @param \Iterator $tokens - * @return void - */ - public static function skipSpaceAndComments(\Iterator $tokens): void - { - while ( - $tokens->valid() && match ($tokens->current()->type) { - TokenType::SPACE, - TokenType::END_OF_LINE, - TokenType::COMMENT => true, - default => false - } - ) { - $tokens->next(); - } - } - - /** - * @param \Iterator $tokens - * @return string - */ - public static function value(\Iterator $tokens): string - { - self::assertValid($tokens); - return $tokens->current()->value; - } - - /** - * @param \Iterator $tokens - * @return TokenType - */ - public static function type(\Iterator $tokens): TokenType - { - self::assertValid($tokens); - return $tokens->current()->type; - } - - /** - * @param \Iterator $tokens - * @return Path - */ - public static function sourcePath(\Iterator $tokens): Path - { - self::assertValid($tokens); - return $tokens->current()->sourcePath; - } - - /** - * @param \Iterator $tokens - * @return bool - */ - public static function isEnd(\Iterator $tokens): bool - { - return !$tokens->valid(); - } - - /** - * @param \Iterator $tokens - */ - public static function debugPrint(\Iterator &$tokens): string - { - $tokens = (function(): \Generator { - throw new \Exception('Once debugged, $tokens is empty.'); - // @phpstan-ignore-next-line - yield; - })(); - - $tokensAsArray = []; - while ($tokens->valid()) { - $tokensAsArray[] = [ - "type" => $tokens->current()->type, - "value" => $tokens->current()->value - ]; - $tokens->next(); - } - return json_encode($tokensAsArray, JSON_PRETTY_PRINT | JSON_THROW_ON_ERROR); - } -} diff --git a/src/Parser/Tokenizer/Token.php b/src/Parser/Tokenizer/Token.php deleted file mode 100644 index 66b5eb0..0000000 --- a/src/Parser/Tokenizer/Token.php +++ /dev/null @@ -1,73 +0,0 @@ -. - */ - -declare(strict_types=1); - -namespace PackageFactory\ComponentEngine\Parser\Tokenizer; - -use PackageFactory\ComponentEngine\Parser\Source\Range; -use PackageFactory\ComponentEngine\Parser\Source\Fragment; -use PackageFactory\ComponentEngine\Parser\Source\Path; - -final class Token -{ - public function __construct( - public readonly TokenType $type, - public readonly string $value, - public readonly Range $boundaries, - public readonly Path $sourcePath - ) { - } - - public static function fromFragment( - TokenType $type, - Fragment $fragment - ): Token { - return new Token( - $type, - $fragment->value, - Range::from($fragment->start, $fragment->end), - $fragment->source->path - ); - } - - public static function emptyFromDelimitingFragments( - TokenType $type, - Fragment $startFragment, - Fragment $endFragment - ): Token { - return new Token( - $type, - '', - Range::from($startFragment->start, $endFragment->end), - $startFragment->source->path - ); - } - - public function __toString(): string - { - return $this->value; - } - - public function toDebugString(): string - { - return sprintf('%s ("%s")', $this->type->value, $this->value); - } -} diff --git a/src/Parser/Tokenizer/TokenType.php b/src/Parser/Tokenizer/TokenType.php deleted file mode 100644 index 0c2b8fa..0000000 --- a/src/Parser/Tokenizer/TokenType.php +++ /dev/null @@ -1,226 +0,0 @@ -. - */ - -declare(strict_types=1); - -namespace PackageFactory\ComponentEngine\Parser\Tokenizer; - -use PackageFactory\ComponentEngine\Parser\Source\Fragment; - -enum TokenType: string -{ - case COMMENT = 'COMMENT'; - - case KEYWORD_FROM = 'KEYWORD_FROM'; - case KEYWORD_IMPORT = 'KEYWORD_IMPORT'; - case KEYWORD_EXPORT = 'KEYWORD_EXPORT'; - case KEYWORD_ENUM = 'KEYWORD_ENUM'; - case KEYWORD_STRUCT = 'KEYWORD_STRUCT'; - case KEYWORD_COMPONENT = 'KEYWORD_COMPONENT'; - case KEYWORD_MATCH = 'KEYWORD_MATCH'; - case KEYWORD_DEFAULT = 'KEYWORD_DEFAULT'; - case KEYWORD_RETURN = 'KEYWORD_RETURN'; - case KEYWORD_TRUE = 'KEYWORD_TRUE'; - case KEYWORD_FALSE = 'KEYWORD_FALSE'; - case KEYWORD_NULL = 'KEYWORD_NULL'; - - case CONSTANT = 'CONSTANT'; - - case STRING = 'STRING'; - case STRING_QUOTED = 'STRING_QUOTED'; - - case NUMBER_BINARY = 'NUMBER_BINARY'; - case NUMBER_OCTAL = 'NUMBER_OCTAL'; - case NUMBER_DECIMAL = 'NUMBER_DECIMAL'; - case NUMBER_HEXADECIMAL = 'NUMBER_HEXADECIMAL'; - - case TEMPLATE_LITERAL_START = 'TEMPLATE_LITERAL_START'; - case TEMPLATE_LITERAL_END = 'TEMPLATE_LITERAL_END'; - - case OPERATOR_BOOLEAN_AND = 'OPERATOR_BOOLEAN_AND'; - case OPERATOR_BOOLEAN_OR = 'OPERATOR_BOOLEAN_OR'; - case OPERATOR_BOOLEAN_NOT = 'OPERATOR_BOOLEAN_NOT'; - - case COMPARATOR_EQUAL = 'COMPARATOR_EQUAL'; - case COMPARATOR_NOT_EQUAL = 'COMPARATOR_NOT_EQUAL'; - case COMPARATOR_GREATER_THAN = 'COMPARATOR_GREATER_THAN'; - case COMPARATOR_GREATER_THAN_OR_EQUAL = 'COMPARATOR_GREATER_THAN_OR_EQUAL'; - case COMPARATOR_LESS_THAN = 'COMPARATOR_LESS_THAN'; - case COMPARATOR_LESS_THAN_OR_EQUAL = 'COMPARATOR_LESS_THAN_OR_EQUAL'; - - case ARROW_SINGLE = 'ARROW_SINGLE'; - - case BRACKET_CURLY_OPEN = 'BRACKET_CURLY_OPEN'; - case BRACKET_CURLY_CLOSE = 'BRACKET_CURLY_CLOSE'; - case BRACKET_ROUND_OPEN = 'BRACKET_ROUND_OPEN'; - case BRACKET_ROUND_CLOSE = 'BRACKET_ROUND_CLOSE'; - case BRACKET_SQUARE_OPEN = 'BRACKET_SQUARE_OPEN'; - case BRACKET_SQUARE_CLOSE = 'BRACKET_SQUARE_CLOSE'; - - case TAG_START_OPENING = 'TAG_START_OPENING'; - case TAG_START_CLOSING = 'TAG_START_CLOSING'; - case TAG_SELF_CLOSE = 'TAG_SELF_CLOSE'; - case TAG_END = 'TAG_END'; - - case PERIOD = 'PERIOD'; - case COLON = 'COLON'; - case QUESTIONMARK = 'QUESTIONMARK'; - case COMMA = 'COMMA'; - case EQUALS = 'EQUALS'; - case SLASH_FORWARD = 'SLASH_FORWARD'; - case DOLLAR = 'DOLLAR'; - case PIPE = 'PIPE'; - - case OPTCHAIN = 'OPTCHAIN'; - case NULLISH_COALESCE = 'NULLISH_COALESCE'; - - case SPACE = 'SPACE'; - case END_OF_LINE = 'END_OF_LINE'; - - public static function fromBuffer(Buffer $buffer): TokenType - { - $value = $buffer->value(); - - return match (true) { - $value === 'from' => self::KEYWORD_FROM, - $value === 'import' => self::KEYWORD_IMPORT, - $value === 'export' => self::KEYWORD_EXPORT, - $value === 'enum' => self::KEYWORD_ENUM, - $value === 'struct' => self::KEYWORD_STRUCT, - $value === 'component' => self::KEYWORD_COMPONENT, - $value === 'match' => self::KEYWORD_MATCH, - $value === 'default' => self::KEYWORD_DEFAULT, - $value === 'return' => self::KEYWORD_RETURN, - $value === 'true' => self::KEYWORD_TRUE, - $value === 'false' => self::KEYWORD_FALSE, - $value === 'null' => self::KEYWORD_NULL, - - $value === '.' => self::PERIOD, - - (bool) preg_match( - '/^0[bB][0-1]+$/', - $value - ) => self::NUMBER_BINARY, - (bool) preg_match( - '/^0o[0-7]+$/', - $value - ) => self::NUMBER_OCTAL, - $value !== '' && preg_match( - '/^([-+]?[0-9]+)$/', - $value - ) => self::NUMBER_DECIMAL, - (bool) preg_match( - '/^0x[0-9a-fA-F]+$/', - $value - ) => self::NUMBER_HEXADECIMAL, - default => self::STRING - }; - } - - public static function tryBracketOpenFromFragment(Fragment $fragment): ?self - { - return match ($fragment->value) { - '{' => self::BRACKET_CURLY_OPEN, - '(' => self::BRACKET_ROUND_OPEN, - '[' => self::BRACKET_SQUARE_OPEN, - default => null - }; - } - - public function closingBracket(): TokenType - { - return match ($this) { - self::BRACKET_CURLY_OPEN => self::BRACKET_CURLY_CLOSE, - self::BRACKET_ROUND_OPEN => self::BRACKET_ROUND_CLOSE, - self::BRACKET_SQUARE_OPEN => self::BRACKET_SQUARE_CLOSE, - default => throw new \Exception('@TODO: Not a bracket.') - }; - } - - public function matchesString(string $string): bool - { - return match ($this) { - self::BRACKET_CURLY_CLOSE => $string === '}', - self::BRACKET_ROUND_CLOSE => $string === ')', - self::BRACKET_SQUARE_CLOSE => $string === ']', - default => false - }; - } - - public function toDebugString(): string - { - return $this->value . match ($this) { - self::COMMENT => ' (e.g. "# ...")', - self::KEYWORD_FROM => ' ("from")', - self::KEYWORD_IMPORT => ' ("import")', - self::KEYWORD_EXPORT => ' ("export")', - self::KEYWORD_ENUM => ' ("enum")', - self::KEYWORD_STRUCT => ' ("struct")', - self::KEYWORD_COMPONENT => ' ("component")', - self::KEYWORD_MATCH => ' ("match")', - self::KEYWORD_DEFAULT => ' ("default")', - self::KEYWORD_RETURN => ' ("return")', - self::KEYWORD_TRUE => ' ("true")', - self::KEYWORD_FALSE => ' ("false")', - self::KEYWORD_NULL => ' ("null")', - self::CONSTANT => '', - self::STRING => '', - self::STRING_QUOTED => '', - self::NUMBER_BINARY => ' (e.g. "0b1001")', - self::NUMBER_OCTAL => ' (e.g. "0o644")', - self::NUMBER_DECIMAL => ' (e.g. "42")', - self::NUMBER_HEXADECIMAL => ' (e.g. "0xABC")', - self::TEMPLATE_LITERAL_START => ' ("`")', - self::TEMPLATE_LITERAL_END => ' ("`")', - self::OPERATOR_BOOLEAN_AND => ' ("&&")', - self::OPERATOR_BOOLEAN_OR => ' ("||")', - self::OPERATOR_BOOLEAN_NOT => ' ("!")', - self::COMPARATOR_EQUAL => ' ("===")', - self::COMPARATOR_NOT_EQUAL => ' ("!==")', - self::COMPARATOR_GREATER_THAN => ' (">")', - self::COMPARATOR_GREATER_THAN_OR_EQUAL => ' (">=")', - self::COMPARATOR_LESS_THAN => ' ("<")', - self::COMPARATOR_LESS_THAN_OR_EQUAL => ' ("<=")', - self::ARROW_SINGLE => ' ("->")', - self::BRACKET_CURLY_OPEN => ' ("{")', - self::BRACKET_CURLY_CLOSE => ' ("}")', - self::BRACKET_ROUND_OPEN => ' ("(")', - self::BRACKET_ROUND_CLOSE => ' (")")', - self::BRACKET_SQUARE_OPEN => ' ("[")', - self::BRACKET_SQUARE_CLOSE => ' ("]")', - self::TAG_START_OPENING => ' ("<")', - self::TAG_START_CLOSING => ' (" ' ("/>")', - self::TAG_END => ' (">")', - self::PERIOD => ' (".")', - self::COLON => ' (":")', - self::QUESTIONMARK => ' ("?")', - self::COMMA => ' (",")', - self::EQUALS => ' ("=")', - self::SLASH_FORWARD => ' ("/")', - self::DOLLAR => ' ("$")', - self::PIPE => ' ("|")', - self::OPTCHAIN => ' ("?.")', - self::NULLISH_COALESCE => ' ("??")', - self::SPACE => '', - self::END_OF_LINE => '' - }; - } -} diff --git a/src/Parser/Tokenizer/TokenTypes.php b/src/Parser/Tokenizer/TokenTypes.php deleted file mode 100644 index 135e1d7..0000000 --- a/src/Parser/Tokenizer/TokenTypes.php +++ /dev/null @@ -1,66 +0,0 @@ -. - */ - -declare(strict_types=1); - -namespace PackageFactory\ComponentEngine\Parser\Tokenizer; - -final class TokenTypes -{ - /** - * @var TokenType[] - */ - private readonly array $items; - - private function __construct(TokenType ...$items) - { - assert(count($items) > 0); - - $this->items = $items; - } - - public static function from(TokenType ...$items): self - { - $items = array_unique($items, SORT_REGULAR); - $items = array_values($items); - - return new self(...$items); - } - - public function contains(TokenType $needle): bool - { - return in_array($needle, $this->items); - } - - public function toDebugString(): string - { - if (count($this->items) === 1) { - return $this->items[0]->toDebugString(); - } - - $leadingItems = array_slice($this->items, 0, -1); - $trailingItem = array_slice($this->items, -1)[0]; - - return join(', ', array_map( - static fn (TokenType $tokenType) => $tokenType->toDebugString(), - $leadingItems - )) . ' or ' . $trailingItem->toDebugString(); - } -} diff --git a/src/Parser/Tokenizer/Tokenizer.php b/src/Parser/Tokenizer/Tokenizer.php deleted file mode 100644 index c670e20..0000000 --- a/src/Parser/Tokenizer/Tokenizer.php +++ /dev/null @@ -1,484 +0,0 @@ -. - */ - -declare(strict_types=1); - -namespace PackageFactory\ComponentEngine\Parser\Tokenizer; - -use PackageFactory\ComponentEngine\Parser\Source\Fragment; -use PackageFactory\ComponentEngine\Parser\Source\Source; - -/** - * @implements \IteratorAggregate - */ -final class Tokenizer implements \IteratorAggregate -{ - private function __construct(private readonly Source $source) - { - } - - public static function fromSource(Source $source): Tokenizer - { - return new Tokenizer(source: $source); - } - - /** - * @return \Iterator - */ - public function getIterator(): \Iterator - { - $fragments = $this->source->getIterator(); - while ($fragments->valid()) { - yield from self::block($fragments); - } - } - - /** - * @param \Iterator $fragments - * @return \Iterator - */ - private static function block(\Iterator $fragments): \Iterator - { - if (!$fragments->valid()) { - return; - } - - $bracket = TokenType::tryBracketOpenFromFragment($fragments->current()); - $buffer = Buffer::empty(); - - if ($bracket) { - yield from $buffer->append($fragments->current())->flush($bracket); - $fragments->next(); - } - - while ($fragments->valid()) { - /** @var Fragment $fragment */ - $fragment = $fragments->current(); - - if ($bracket) { - $closingBracket = $bracket->closingBracket(); - - if ($closingBracket->matchesString($fragment->value)) { - yield from self::flushRemainder($buffer); - yield from $buffer->append($fragments->current())->flush($closingBracket); - $fragments->next(); - return; - } - } - - $delegate = match (CharacterType::get($fragment->value)) { - CharacterType::COMMENT_DELIMITER => self::comment($fragments), - CharacterType::STRING_DELIMITER => self::string($fragments), - CharacterType::TEMPLATE_LITERAL_DELIMITER => self::templateLiteral($fragments), - CharacterType::BRACKET_OPEN => self::block($fragments), - CharacterType::ANGLE_OPEN => self::angle($fragments), - CharacterType::PERIOD => self::period($fragments), - CharacterType::ANGLE_CLOSE, - CharacterType::FORWARD_SLASH, - CharacterType::SYMBOL => self::symbol($fragments), - CharacterType::SPACE => self::space($fragments), - default => null - }; - - if ($delegate) { - yield from self::flushRemainder($buffer); - yield from $delegate; - } else { - $buffer->append($fragment); - $fragments->next(); - } - } - - yield from self::flushRemainder($buffer); - } - - /** - * @param Buffer $buffer - * @return \Iterator - */ - private static function flushRemainder(Buffer $buffer): \Iterator - { - yield from $buffer->flush(TokenType::fromBuffer($buffer)); - } - - /** - * @param \Iterator $fragments - * @return \Iterator - */ - private static function string(\Iterator $fragments): \Iterator - { - $delimiter = $fragments->current(); - $fragments->next(); - - $buffer = Buffer::empty(); - - while ($fragments->valid()) { - switch ($fragments->current()->value) { - case $delimiter->value: - if ($buffer->isEmpty()) { - yield Token::emptyFromDelimitingFragments( - TokenType::STRING_QUOTED, - $delimiter, - $fragments->current() - ); - } else { - yield from $buffer->flush(TokenType::STRING_QUOTED); - } - $fragments->next(); - return; - - case '\\': - $buffer->append($fragments->current()); - $fragments->next(); - - if (!$fragments->valid()) { - throw new \Exception("@TODO: Unexpected end of input"); - } - - $buffer->append($fragments->current()); - $fragments->next(); - break; - - default: - $buffer->append($fragments->current()); - $fragments->next(); - break; - } - } - } - - /** - * @param \Iterator $fragments - * @return \Iterator - */ - public static function templateLiteral(\Iterator $fragments): \Iterator - { - $buffer = Buffer::empty(); - $buffer->append($fragments->current()); - - yield from $buffer->flush(TokenType::TEMPLATE_LITERAL_START); - - $fragments->next(); - - while ($fragments->valid()) { - - switch ($fragments->current()->value) { - case '`': - yield from $buffer->flush(TokenType::STRING_QUOTED); - $buffer->append($fragments->current()); - yield from $buffer->flush(TokenType::TEMPLATE_LITERAL_END); - $fragments->next(); - return; - - case '$': - $dollarSignBuffer = Buffer::empty()->append($fragments->current()); - $fragments->next(); - - if (!$fragments->valid()) { - throw new \Exception("@TODO: Unexpected end of input"); - } - - $nextFragment = $fragments->current(); - - if ($nextFragment->value === '{') { - yield from $buffer->flush(TokenType::STRING_QUOTED); - yield from $dollarSignBuffer->flush(TokenType::DOLLAR); - yield from self::block($fragments); - } - break; - - case '\\': - $buffer->append($fragments->current()); - $fragments->next(); - - if (!$fragments->valid()) { - throw new \Exception("@TODO: Unexpected end of input"); - } - - $buffer->append($fragments->current()); - $fragments->next(); - break; - - default: - $buffer->append($fragments->current()); - $fragments->next(); - break; - } - } - } - - /** - * @param \Iterator $fragments - * @return \Iterator - */ - public static function period(\Iterator $fragments): \Iterator - { - $buffer = Buffer::empty()->append($fragments->current()); - $fragments->next(); - - yield from $buffer->flush(TokenType::PERIOD); - } - - /** - * @param \Iterator $fragments - */ - public static function symbol(\Iterator $fragments, ?Buffer $buffer = null): \Iterator - { - $buffer = $buffer ?? Buffer::empty(); - $capture = true; - - while ($capture && $fragments->valid()) { - $fragment = $fragments->current(); - - if ($buffer->value() === '!' && $fragment->value === '!') { - // chained `!` must be kept as individual fragments/tokens - break; - } - - $capture = match (CharacterType::get($fragment->value)) { - CharacterType::ANGLE_CLOSE, - CharacterType::FORWARD_SLASH, - CharacterType::PERIOD, - CharacterType::SYMBOL => (bool) $buffer->append($fragment), - default => false - }; - - if ($capture) $fragments->next(); - } - - yield from match ($buffer->value()) { - '&&' => $buffer->flush(TokenType::OPERATOR_BOOLEAN_AND), - '||' => $buffer->flush(TokenType::OPERATOR_BOOLEAN_OR), - '!' => $buffer->flush(TokenType::OPERATOR_BOOLEAN_NOT), - '>' => $buffer->flush(TokenType::COMPARATOR_GREATER_THAN), - '>=' => $buffer->flush(TokenType::COMPARATOR_GREATER_THAN_OR_EQUAL), - '<' => $buffer->flush(TokenType::COMPARATOR_LESS_THAN), - '<=' => $buffer->flush(TokenType::COMPARATOR_LESS_THAN_OR_EQUAL), - '===' => $buffer->flush(TokenType::COMPARATOR_EQUAL), - '!==' => $buffer->flush(TokenType::COMPARATOR_NOT_EQUAL), - '->' => $buffer->flush(TokenType::ARROW_SINGLE), - ':' => $buffer->flush(TokenType::COLON), - '?.' => $buffer->flush(TokenType::OPTCHAIN), - '.' => $buffer->flush(TokenType::PERIOD), - ',' => $buffer->flush(TokenType::COMMA), - '=' => $buffer->flush(TokenType::EQUALS), - '?' => $buffer->flush(TokenType::QUESTIONMARK), - '$' => $buffer->flush(TokenType::DOLLAR), - '|' => $buffer->flush(TokenType::PIPE), - default => self::flushRemainder($buffer) - }; - } - - /** - * @param \Iterator $fragments - * @return \Iterator - */ - public static function angle(\Iterator $fragments): \Iterator - { - $buffer = Buffer::empty(); - - /** @var Fragment $fragment */ - $fragment = $fragments->current(); - $buffer->append($fragment); - - $fragments->next(); - if ($fragments->valid()) { - /** @var Fragment $fragment */ - $fragment = $fragments->current(); - yield from match (CharacterType::get($fragment->value)) { - CharacterType::SYMBOL => self::symbol($fragments, $buffer), - CharacterType::SPACE => $buffer->flush(TokenType::COMPARATOR_LESS_THAN), - default => self::tag($fragments, $buffer) - }; - } - } - - /** - * @param \Iterator $fragments - * @param null|Buffer $buffer - * @return \Iterator - */ - public static function tag(\Iterator $fragments, ?Buffer $buffer = null): \Iterator - { - $buffer = $buffer ?? Buffer::empty(); - $isClosing = false; - - while ($fragments->valid()) { - /** @var Fragment $fragment */ - $fragment = $fragments->current(); - if ($buffer->value() === '<') { - if ($fragment->value === '/') { - yield from $buffer->append($fragment)->flush(TokenType::TAG_START_CLOSING); - $fragments->next(); - $isClosing = true; - continue; - } else { - yield from $buffer->flush(TokenType::TAG_START_OPENING); - } - } - - switch (true) { - case $fragment->value === '=': - yield from $buffer->flush(TokenType::STRING); - yield from $buffer->append($fragment)->flush(TokenType::EQUALS); - $fragments->next(); - break; - case $fragment->value === '{': - yield from $buffer->flush(TokenType::STRING); - yield from self::block($fragments); - break; - case $fragment->value === '"': - yield from $buffer->flush(TokenType::STRING); - yield from self::string($fragments); - break; - case $fragment->value === '/': - yield from $buffer->flush(TokenType::STRING); - $buffer->append($fragment); - $fragments->next(); - if (!$fragments->valid()) { - throw new \Exception("@TODO: Unexpected end of input"); - } - $nextFragment = $fragments->current(); - if ($nextFragment->value === '>') { - yield from $buffer->append($nextFragment)->flush(TokenType::TAG_SELF_CLOSE); - $fragments->next(); - } else { - throw new \Exception("@TODO: Illegal Character"); - } - - - return; - case $fragment->value === '>': - yield from $buffer->flush(TokenType::STRING); - yield from $buffer->append($fragment)->flush(TokenType::TAG_END); - $fragments->next(); - - if ($isClosing) { - return; - } else { - $buffer = (yield from self::tagContent($fragments)) ?? Buffer::empty(); - } - break; - case ctype_space($fragment->value): - yield from $buffer->flush(TokenType::STRING); - yield from self::space($fragments); - break; - default: - $buffer->append($fragment); - $fragments->next(); - break; - } - } - - yield from $buffer->flush(TokenType::STRING); - } - - /** - * @param \Iterator $fragments - * @return \Iterator - */ - public static function tagContent(\Iterator $fragments): \Iterator - { - $buffer = Buffer::empty(); - while ($fragments->valid()) { - /** @var Fragment $fragment */ - $fragment = $fragments->current(); - switch (true) { - case $fragment->value === '{': - yield from $buffer->flush(TokenType::STRING); - yield from self::block($fragments); - break; - case $fragment->value === '<': - $fragments->next(); - if (!$fragments->valid()) { - throw new \Exception("@TODO: Unexpected end of input"); - } - if ($fragments->current()->value === '/') { - yield from $buffer->flush(TokenType::STRING); - return Buffer::empty()->append($fragment); - } else if (!ctype_space($fragments->current()->value)) { - yield from self::tag($fragments, Buffer::empty()->append($fragment)); - } else { - $buffer->append($fragment); - } - case ctype_space($fragment->value): - yield from $buffer->flush(TokenType::STRING); - yield from self::space($fragments); - break; - default: - $buffer->append($fragment); - $fragments->next(); - break; - } - } - - yield from $buffer->flush(TokenType::STRING); - } - - /** - * @param \Iterator $fragments - * @return \Iterator - */ - public static function space(\Iterator $fragments): \Iterator - { - $buffer = Buffer::empty(); - - while ($fragments->valid()) { - /** @var Fragment $fragment */ - $fragment = $fragments->current(); - - if ($fragment->value === PHP_EOL) { - yield from $buffer->flush(TokenType::SPACE); - yield from $buffer->append($fragment)->flush(TokenType::END_OF_LINE); - } else if (ctype_space($fragment->value)) { - $buffer->append($fragment); - } else { - break; - } - - $fragments->next(); - } - - yield from $buffer->flush(TokenType::SPACE); - } - - /** - * @param \Iterator $fragments - * @return \Iterator - */ - public static function comment(\Iterator $fragments): \Iterator - { - $buffer = Buffer::empty(); - - while ($fragments->valid()) { - /** @var Fragment $fragment */ - $fragment = $fragments->current(); - - if ($fragment->value === PHP_EOL) { - break; - } - - $buffer->append($fragment); - $fragments->next(); - } - - yield from $buffer->flush(TokenType::COMMENT); - } -} diff --git a/test/Integration/PhpTranspilerIntegrationTest.php b/test/Integration/PhpTranspilerIntegrationTest.php index a025eb1..5617c73 100644 --- a/test/Integration/PhpTranspilerIntegrationTest.php +++ b/test/Integration/PhpTranspilerIntegrationTest.php @@ -27,7 +27,6 @@ use PackageFactory\ComponentEngine\Module\Loader\ModuleFile\ModuleFileLoader; use PackageFactory\ComponentEngine\Parser\Source\Path; use PackageFactory\ComponentEngine\Parser\Source\Source; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Tokenizer; use PackageFactory\ComponentEngine\Target\Php\Transpiler\Module\ModuleTranspiler; use PackageFactory\ComponentEngine\Test\Unit\Target\Php\Transpiler\Module\ModuleTestStrategy; use PackageFactory\ComponentEngine\TypeSystem\Scope\GlobalScope\GlobalScope; diff --git a/test/Unit/Language/Parser/ParserTestCase.php b/test/Unit/Language/Parser/ParserTestCase.php index 1512025..c473a16 100644 --- a/test/Unit/Language/Parser/ParserTestCase.php +++ b/test/Unit/Language/Parser/ParserTestCase.php @@ -25,25 +25,10 @@ use PackageFactory\ComponentEngine\Language\Parser\ParserException; use PackageFactory\ComponentEngine\Parser\Source\Position; use PackageFactory\ComponentEngine\Parser\Source\Range; -use PackageFactory\ComponentEngine\Parser\Source\Source; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Tokenizer; use PHPUnit\Framework\TestCase; abstract class ParserTestCase extends TestCase { - /** - * @param string $sourceAsString - * @return \Iterator - */ - protected function createTokenIterator(string $sourceAsString): \Iterator - { - $source = Source::fromString($sourceAsString); - $tokenizer = Tokenizer::fromSource($source); - - return $tokenizer->getIterator(); - } - /** * @param array{int,int} $startAsArray * @param array{int,int} $endAsArray diff --git a/test/Unit/Parser/Tokenizer/Fixtures.php b/test/Unit/Parser/Tokenizer/Fixtures.php deleted file mode 100644 index cfe7a57..0000000 --- a/test/Unit/Parser/Tokenizer/Fixtures.php +++ /dev/null @@ -1,42 +0,0 @@ -. - */ - -declare(strict_types=1); - -namespace PackageFactory\ComponentEngine\Test\Unit\Parser\Tokenizer; - -use PackageFactory\ComponentEngine\Parser\Source\Source; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Tokenizer; - -final class Fixtures -{ - /** - * @param string $sourceAsString - * @return \Iterator - */ - public static function tokens(string $sourceAsString): \Iterator - { - $source = Source::fromString($sourceAsString); - $tokenizer = Tokenizer::fromSource($source); - - return $tokenizer->getIterator(); - } -} diff --git a/test/Unit/Parser/Tokenizer/TokenTest.php b/test/Unit/Parser/Tokenizer/TokenTest.php deleted file mode 100644 index 44159ce..0000000 --- a/test/Unit/Parser/Tokenizer/TokenTest.php +++ /dev/null @@ -1,55 +0,0 @@ -. - */ - -declare(strict_types=1); - -namespace PackageFactory\ComponentEngine\Test\Unit\Parser\Tokenizer; - -use PackageFactory\ComponentEngine\Parser\Source\Path; -use PackageFactory\ComponentEngine\Parser\Source\Position; -use PackageFactory\ComponentEngine\Parser\Source\Range; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Token; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenTypes; -use PHPUnit\Framework\TestCase; - -final class TokenTest extends TestCase -{ - /** - * @test - */ - public function providesDebugString(): void - { - $token = new Token( - type: TokenType::COMMENT, - value: '# This is a comment', - boundaries: Range::from( - new Position(0, 0), - new Position(0, 0) - ), - sourcePath: Path::createMemory() - ); - - $this->assertEquals( - 'COMMENT ("# This is a comment")', - $token->toDebugString() - ); - } -} diff --git a/test/Unit/Parser/Tokenizer/TokenTypesTest.php b/test/Unit/Parser/Tokenizer/TokenTypesTest.php deleted file mode 100644 index 5afe1e4..0000000 --- a/test/Unit/Parser/Tokenizer/TokenTypesTest.php +++ /dev/null @@ -1,110 +0,0 @@ -. - */ - -declare(strict_types=1); - -namespace PackageFactory\ComponentEngine\Test\Unit\Parser\Tokenizer; - -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenTypes; -use PHPUnit\Framework\TestCase; - -final class TokenTypesTest extends TestCase -{ - /** - * @test - */ - public function providesDebugStringForSingleItem(): void - { - $tokenTypes = TokenTypes::from(TokenType::COLON); - - $this->assertEquals( - 'COLON (":")', - $tokenTypes->toDebugString() - ); - } - - /** - * @test - */ - public function providesDebugStringForTwoItems(): void - { - $tokenTypes = TokenTypes::from(TokenType::PERIOD, TokenType::COMMA); - - $this->assertEquals( - 'PERIOD (".") or COMMA (",")', - $tokenTypes->toDebugString() - ); - } - - /** - * @test - */ - public function providesDebugStringForThreeOrMoreItems(): void - { - $tokenTypes = TokenTypes::from( - TokenType::PERIOD, - TokenType::COMMA, - TokenType::COLON, - TokenType::DOLLAR - ); - - $this->assertEquals( - 'PERIOD ("."), COMMA (","), COLON (":") or DOLLAR ("$")', - $tokenTypes->toDebugString() - ); - } - - /** - * @test - */ - public function containsReturnsTrueIfCollectionContainsGivenTokenType(): void - { - $tokenTypes = TokenTypes::from( - TokenType::PERIOD, - TokenType::COMMA, - TokenType::COLON, - TokenType::DOLLAR - ); - - $this->assertTrue($tokenTypes->contains(TokenType::PERIOD)); - $this->assertTrue($tokenTypes->contains(TokenType::COMMA)); - $this->assertTrue($tokenTypes->contains(TokenType::COLON)); - $this->assertTrue($tokenTypes->contains(TokenType::DOLLAR)); - } - - /** - * @test - */ - public function containsReturnsFalseIfCollectionDoesNotContainGivenTokenType(): void - { - $tokenTypes = TokenTypes::from( - TokenType::PERIOD, - TokenType::COMMA, - TokenType::COLON, - TokenType::DOLLAR - ); - - $this->assertFalse($tokenTypes->contains(TokenType::SLASH_FORWARD)); - $this->assertFalse($tokenTypes->contains(TokenType::COMMENT)); - $this->assertFalse($tokenTypes->contains(TokenType::STRING)); - $this->assertFalse($tokenTypes->contains(TokenType::EQUALS)); - } -} diff --git a/test/Unit/Parser/Tokenizer/TokenizerTest.php b/test/Unit/Parser/Tokenizer/TokenizerTest.php deleted file mode 100644 index 27ab660..0000000 --- a/test/Unit/Parser/Tokenizer/TokenizerTest.php +++ /dev/null @@ -1,168 +0,0 @@ -. - */ - -declare(strict_types=1); - -namespace PackageFactory\ComponentEngine\Test\Unit\Parser\Tokenizer; - -use PackageFactory\ComponentEngine\Parser\Source\Source; -use PackageFactory\ComponentEngine\Parser\Tokenizer\Tokenizer; -use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType; -use PHPUnit\Framework\TestCase; - -final class TokenizerTest extends TestCase -{ - /** - * @test - */ - public function tokenizesEmptySourceToEmptyIterator(): void - { - $source = Source::fromString(''); - $tokenizer = Tokenizer::fromSource($source); - $iterator = $tokenizer->getIterator(); - - $this->assertFalse($iterator->valid()); - } - - /** - * @test - */ - public function tokenizesOpeningTag(): void - { - $source = Source::fromString(''); - $tokenizer = Tokenizer::fromSource($source); - $tokens = \iterator_to_array($tokenizer->getIterator(), false); - - $this->assertEquals(TokenType::TAG_START_OPENING, $tokens[0]->type); - $this->assertEquals(TokenType::STRING, $tokens[1]->type); - $this->assertEquals(TokenType::TAG_END, $tokens[2]->type); - } - - /** - * @test - */ - public function tokenizesClosingTag(): void - { - $source = Source::fromString(''); - $tokenizer = Tokenizer::fromSource($source); - $tokens = \iterator_to_array($tokenizer->getIterator(), false); - - $this->assertEquals(TokenType::TAG_START_CLOSING, $tokens[0]->type); - $this->assertEquals(TokenType::STRING, $tokens[1]->type); - $this->assertEquals(TokenType::TAG_END, $tokens[2]->type); - } - - /** - * @test - */ - public function tokenizesMultipleBracketedStatements(): void - { - $source = Source::fromString('(a ? b : c) ? (d ? e : f) : (g ? h : i)'); - $tokenizer = Tokenizer::fromSource($source); - $tokens = \iterator_to_array($tokenizer->getIterator(), false); - - $this->assertEquals(TokenType::BRACKET_ROUND_OPEN, $tokens[0]->type); - - $this->assertEquals(TokenType::STRING, $tokens[1]->type); - $this->assertEquals('a', $tokens[1]->value); - - $this->assertEquals(TokenType::SPACE, $tokens[2]->type); - - $this->assertEquals(TokenType::QUESTIONMARK, $tokens[3]->type); - - $this->assertEquals(TokenType::SPACE, $tokens[4]->type); - - $this->assertEquals(TokenType::STRING, $tokens[5]->type); - $this->assertEquals('b', $tokens[5]->value); - - $this->assertEquals(TokenType::SPACE, $tokens[6]->type); - - $this->assertEquals(TokenType::COLON, $tokens[7]->type); - - $this->assertEquals(TokenType::SPACE, $tokens[8]->type); - - $this->assertEquals(TokenType::STRING, $tokens[9]->type); - $this->assertEquals('c', $tokens[9]->value); - - $this->assertEquals(TokenType::BRACKET_ROUND_CLOSE, $tokens[10]->type); - - $this->assertEquals(TokenType::SPACE, $tokens[11]->type); - - $this->assertEquals(TokenType::QUESTIONMARK, $tokens[12]->type); - - $this->assertEquals(TokenType::SPACE, $tokens[13]->type); - - $this->assertEquals(TokenType::BRACKET_ROUND_OPEN, $tokens[14]->type); - - $this->assertEquals(TokenType::STRING, $tokens[15]->type); - $this->assertEquals('d', $tokens[15]->value); - - $this->assertEquals(TokenType::SPACE, $tokens[16]->type); - - $this->assertEquals(TokenType::QUESTIONMARK, $tokens[17]->type); - - $this->assertEquals(TokenType::SPACE, $tokens[18]->type); - - $this->assertEquals(TokenType::STRING, $tokens[19]->type); - $this->assertEquals('e', $tokens[19]->value); - - $this->assertEquals(TokenType::SPACE, $tokens[20]->type); - - $this->assertEquals(TokenType::COLON, $tokens[21]->type); - - $this->assertEquals(TokenType::SPACE, $tokens[22]->type); - - $this->assertEquals(TokenType::STRING, $tokens[23]->type); - $this->assertEquals('f', $tokens[23]->value); - - $this->assertEquals(TokenType::BRACKET_ROUND_CLOSE, $tokens[24]->type); - - $this->assertEquals(TokenType::SPACE, $tokens[25]->type); - - $this->assertEquals(TokenType::COLON, $tokens[26]->type); - - $this->assertEquals(TokenType::SPACE, $tokens[27]->type); - - $this->assertEquals(TokenType::BRACKET_ROUND_OPEN, $tokens[28]->type); - - $this->assertEquals(TokenType::STRING, $tokens[29]->type); - $this->assertEquals('g', $tokens[29]->value); - - $this->assertEquals(TokenType::SPACE, $tokens[30]->type); - - $this->assertEquals(TokenType::QUESTIONMARK, $tokens[31]->type); - - $this->assertEquals(TokenType::SPACE, $tokens[32]->type); - - $this->assertEquals(TokenType::STRING, $tokens[33]->type); - $this->assertEquals('h', $tokens[33]->value); - - $this->assertEquals(TokenType::SPACE, $tokens[34]->type); - - $this->assertEquals(TokenType::COLON, $tokens[35]->type); - - $this->assertEquals(TokenType::SPACE, $tokens[36]->type); - - $this->assertEquals(TokenType::STRING, $tokens[37]->type); - $this->assertEquals('i', $tokens[37]->value); - - $this->assertEquals(TokenType::BRACKET_ROUND_CLOSE, $tokens[38]->type); - } -} From 158fbc13ff276ade15a71bf67a34f2bd64782638 Mon Sep 17 00:00:00 2001 From: Wilhelm Behncke Date: Fri, 11 Aug 2023 15:36:42 +0200 Subject: [PATCH 06/19] TASK: Expose buffer of lexer and remove Token class --- src/Language/Lexer/Lexer.php | 18 +- src/Language/Lexer/LexerException.php | 16 - src/Language/Lexer/Token/Token.php | 35 -- .../BooleanLiteral/BooleanLiteralParser.php | 7 +- .../ComponentDeclarationParser.php | 10 +- .../EnumDeclaration/EnumDeclarationParser.php | 15 +- .../Expression/ExpressionCouldNotBeParsed.php | 20 +- .../Parser/Expression/ExpressionParser.php | 27 +- src/Language/Parser/Import/ImportParser.php | 27 +- .../IntegerLiteral/IntegerLiteralParser.php | 15 +- .../Parser/NullLiteral/NullLiteralParser.php | 3 +- .../PropertyDeclarationParser.php | 26 +- .../StringLiteral/StringLiteralParser.php | 4 +- .../StructDeclarationParser.php | 10 +- src/Language/Parser/Tag/TagParser.php | 24 +- .../TemplateLiteral/TemplateLiteralParser.php | 5 +- src/Language/Parser/Text/TextParser.php | 2 +- .../TypeReference/TypeReferenceParser.php | 5 +- .../ValueReference/ValueReferenceParser.php | 5 +- src/Language/Util/DebugHelper.php | 6 - src/Parser/Source/Position.php | 9 +- test/Unit/Language/Lexer/LexerTest.php | 535 +++++++++--------- 22 files changed, 370 insertions(+), 454 deletions(-) delete mode 100644 src/Language/Lexer/Token/Token.php diff --git a/src/Language/Lexer/Lexer.php b/src/Language/Lexer/Lexer.php index 3a83ff1..5e7651f 100644 --- a/src/Language/Lexer/Lexer.php +++ b/src/Language/Lexer/Lexer.php @@ -42,7 +42,6 @@ final class Lexer private int $offset = 0; private string $buffer = ''; private ?TokenType $tokenTypeUnderCursor = null; - private ?Token $tokenUnderCursor = null; public function __construct(string $source) { @@ -67,13 +66,9 @@ public function getTokenTypeUnderCursor(): TokenType return $this->tokenTypeUnderCursor; } - public function getTokenUnderCursor(): Token + public function getBuffer(): string { - return $this->tokenUnderCursor ??= new Token( - rangeInSource: Range::from($this->startPosition, $this->getEndPosition()), - type: $this->getTokenTypeUnderCursor(), - value: $this->buffer - ); + return $this->buffer; } public function isEnd(): bool @@ -93,16 +88,19 @@ public function assertIsEnd(): void public function getStartPosition(): Position { - return $this->startPosition; } public function getEndPosition(): Position { - return $this->characterStream->getPreviousPosition(); } + public function getCursorRange(): Range + { + return $this->getStartPosition()->toRange($this->getEndPosition()); + } + public function read(TokenType $tokenType): void { @@ -296,7 +294,6 @@ private function skipAnyOf(TokenTypes $tokenTypes): void private function extract(TokenType $tokenType): ?TokenType { $this->startPosition = $this->characterStream->getCurrentPosition(); - $this->tokenUnderCursor = null; $this->offset = 0; $this->buffer = ''; @@ -321,7 +318,6 @@ private function extract(TokenType $tokenType): ?TokenType private function extractOneOf(TokenTypes $tokenTypes): ?TokenType { $this->startPosition = $this->characterStream->getCurrentPosition(); - $this->tokenUnderCursor = null; $this->offset = 0; $this->buffer = ''; diff --git a/src/Language/Lexer/LexerException.php b/src/Language/Lexer/LexerException.php index 99a0bbe..6b376d0 100644 --- a/src/Language/Lexer/LexerException.php +++ b/src/Language/Lexer/LexerException.php @@ -22,7 +22,6 @@ namespace PackageFactory\ComponentEngine\Language\Lexer; -use PackageFactory\ComponentEngine\Language\Lexer\Token\Token; use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; use PackageFactory\ComponentEngine\Language\Util\DebugHelper; use PackageFactory\ComponentEngine\Parser\Source\Range; @@ -74,21 +73,6 @@ public static function becauseOfUnexpectedCharacterSequence( ); } - public static function becauseOfUnexpectedToken( - TokenTypes $expectedTokenTypes, - Token $actualToken - ): self { - return new self( - code: 1691575769, - message: sprintf( - 'Unexpected token "%s" was encountered. Expected %s instead.', - DebugHelper::describeToken($actualToken), - DebugHelper::describeTokenTypes($expectedTokenTypes) - ), - affectedRangeInSource: $actualToken->rangeInSource - ); - } - public static function becauseOfUnexpectedExceedingSource( Range $affectedRangeInSource, string $exceedingCharacter diff --git a/src/Language/Lexer/Token/Token.php b/src/Language/Lexer/Token/Token.php deleted file mode 100644 index 97ea214..0000000 --- a/src/Language/Lexer/Token/Token.php +++ /dev/null @@ -1,35 +0,0 @@ -. - */ - -declare(strict_types=1); - -namespace PackageFactory\ComponentEngine\Language\Lexer\Token; - -use PackageFactory\ComponentEngine\Parser\Source\Range; - -final class Token -{ - public function __construct( - public readonly Range $rangeInSource, - public readonly TokenType $type, - public readonly string $value - ) { - } -} diff --git a/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php b/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php index de40bbf..b2ac8ad 100644 --- a/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php +++ b/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php @@ -46,12 +46,9 @@ public function parse(Lexer $lexer): BooleanLiteralNode { $lexer->readOneOf(self::$TOKEN_TYPES_BOOLEAN_KEYWORDS); - $token = $lexer->getTokenUnderCursor(); - $value = $token->type === TokenType::KEYWORD_TRUE; - return new BooleanLiteralNode( - rangeInSource: $token->rangeInSource, - value: $value + rangeInSource: $lexer->getCursorRange(), + value: $lexer->getTokenTypeUnderCursor() === TokenType::KEYWORD_TRUE ); } } diff --git a/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php b/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php index ccdd11e..2d0d909 100644 --- a/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php +++ b/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php @@ -76,14 +76,14 @@ public function parse(Lexer $lexer): ComponentDeclarationNode private function parseName(Lexer $lexer): ComponentNameNode { $lexer->read(TokenType::WORD); - $componentNameToken = $lexer->getTokenUnderCursor(); + $componentNameNode = new ComponentNameNode( + rangeInSource: $lexer->getCursorRange(), + value: ComponentName::from($lexer->getBuffer()) + ); $lexer->skipSpace(); - return new ComponentNameNode( - rangeInSource: $componentNameToken->rangeInSource, - value: ComponentName::from($componentNameToken->value) - ); + return $componentNameNode; } private function parseProps(Lexer $lexer): PropertyDeclarationNodes diff --git a/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php b/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php index 2783779..57ca71f 100644 --- a/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php +++ b/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php @@ -81,13 +81,13 @@ public function parse(Lexer $lexer): EnumDeclarationNode private function parseEnumName(Lexer $lexer): EnumNameNode { $lexer->read(TokenType::WORD); - $enumKeyNameToken = $lexer->getTokenUnderCursor(); + $enumNameNode = new EnumNameNode( + rangeInSource: $lexer->getCursorRange(), + value: EnumName::from($lexer->getBuffer()) + ); $lexer->skipSpace(); - return new EnumNameNode( - rangeInSource: $enumKeyNameToken->rangeInSource, - value: EnumName::from($enumKeyNameToken->value) - ); + return $enumNameNode; } private function parseEnumMemberDeclarations(Lexer $lexer): EnumMemberDeclarationNodes @@ -126,11 +126,10 @@ private function parseEnumMemberDeclaration(Lexer $lexer): EnumMemberDeclaration private function parseEnumMemberName(Lexer $lexer): EnumMemberNameNode { $lexer->read(TokenType::WORD); - $enumMemberNameToken = $lexer->getTokenUnderCursor(); return new EnumMemberNameNode( - rangeInSource: $enumMemberNameToken->rangeInSource, - value: EnumMemberName::from($enumMemberNameToken->value) + rangeInSource: $lexer->getCursorRange(), + value: EnumMemberName::from($lexer->getBuffer()) ); } diff --git a/src/Language/Parser/Expression/ExpressionCouldNotBeParsed.php b/src/Language/Parser/Expression/ExpressionCouldNotBeParsed.php index 47cb22b..f9fc4dd 100644 --- a/src/Language/Parser/Expression/ExpressionCouldNotBeParsed.php +++ b/src/Language/Parser/Expression/ExpressionCouldNotBeParsed.php @@ -22,27 +22,9 @@ namespace PackageFactory\ComponentEngine\Language\Parser\Expression; -use PackageFactory\ComponentEngine\Language\Lexer\Token\Token; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; use PackageFactory\ComponentEngine\Language\Parser\ParserException; -use PackageFactory\ComponentEngine\Language\Util\DebugHelper; final class ExpressionCouldNotBeParsed extends ParserException { - public static function becauseOfUnexpectedToken( - TokenTypes $expectedTokenTypes, - Token $actualToken - ): self { - return new self( - code: 1691063089, - message: sprintf( - 'Expression could not be parsed because of unexpected token %s. ' - . 'Expected %s instead.', - DebugHelper::describeToken($actualToken), - DebugHelper::describeTokenTypes($expectedTokenTypes) - ), - affectedRangeInSource: $actualToken->rangeInSource - ); - } + protected const TITLE = 'Expression could not be parsed'; } diff --git a/src/Language/Parser/Expression/ExpressionParser.php b/src/Language/Parser/Expression/ExpressionParser.php index ca741aa..abf6e57 100644 --- a/src/Language/Parser/Expression/ExpressionParser.php +++ b/src/Language/Parser/Expression/ExpressionParser.php @@ -340,24 +340,21 @@ private function parseAcccess(Lexer $lexer, ExpressionNode $parent): ExpressionN $type = $this->parseAccessType($lexer); $lexer->read(TokenType::WORD); - $keyToken = $lexer->getTokenUnderCursor(); - - $rangeInSource = Range::from( - $parent->rangeInSource->start, - $keyToken->rangeInSource->end + $accessNode = new AccessNode( + rangeInSource: $parent->rangeInSource->start->toRange( + $lexer->getEndPosition() + ), + parent: $parent, + type: $type, + key: new AccessKeyNode( + rangeInSource: $lexer->getCursorRange(), + value: PropertyName::from($lexer->getBuffer()) + ) ); $parent = new ExpressionNode( - rangeInSource: $rangeInSource, - root: new AccessNode( - rangeInSource: $rangeInSource, - parent: $parent, - type: $type, - key: new AccessKeyNode( - rangeInSource: $keyToken->rangeInSource, - value: PropertyName::from($keyToken->value) - ) - ) + rangeInSource: $accessNode->rangeInSource, + root: $accessNode ); $lexer->skipSpaceAndComments(); diff --git a/src/Language/Parser/Import/ImportParser.php b/src/Language/Parser/Import/ImportParser.php index 92381b6..1a09067 100644 --- a/src/Language/Parser/Import/ImportParser.php +++ b/src/Language/Parser/Import/ImportParser.php @@ -95,10 +95,13 @@ private function parseNames(Lexer $lexer): ImportedNameNodes $start = $lexer->getStartPosition(); $lexer->skipSpaceAndComments(); - $nameTokens = []; + $nameNodes = []; while (!$lexer->peek(TokenType::BRACKET_CURLY_CLOSE)) { $lexer->read(TokenType::WORD); - $nameTokens[] = $lexer->getTokenUnderCursor(); + $nameNodes[] = new ImportedNameNode( + rangeInSource: $lexer->getCursorRange(), + value: VariableName::from($lexer->getBuffer()) + ); $lexer->skipSpaceAndComments(); if ($lexer->probe(TokenType::SYMBOL_COMMA)) { @@ -112,15 +115,7 @@ private function parseNames(Lexer $lexer): ImportedNameNodes $end = $lexer->getEndPosition(); try { - return new ImportedNameNodes( - ...array_map( - static fn (Token $nameToken) => new ImportedNameNode( - rangeInSource: $nameToken->rangeInSource, - value: VariableName::from($nameToken->value) - ), - $nameTokens - ) - ); + return new ImportedNameNodes(...$nameNodes); } catch (InvalidImportedNameNodes $e) { throw ImportCouldNotBeParsed::becauseOfInvalidImportedNameNodes( cause: $e, @@ -128,4 +123,14 @@ private function parseNames(Lexer $lexer): ImportedNameNodes ); } } + + public function parseName(Lexer $lexer): ImportedNameNode + { + $lexer->read(TokenType::WORD); + + return new ImportedNameNode( + rangeInSource: $lexer->getCursorRange(), + value: VariableName::from($lexer->getBuffer()) + ); + } } diff --git a/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php b/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php index 3c6f4f9..0799763 100644 --- a/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php +++ b/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php @@ -28,10 +28,10 @@ use PackageFactory\ComponentEngine\Language\AST\Node\IntegerLiteral\IntegerLiteralNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\LexerException; -use PackageFactory\ComponentEngine\Language\Lexer\Token\Token; use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; use PackageFactory\ComponentEngine\Language\Util\DebugHelper; +use PackageFactory\ComponentEngine\Parser\Source\Range; final class IntegerLiteralParser { @@ -53,21 +53,20 @@ public function parse(Lexer $lexer): IntegerLiteralNode { try { $lexer->readOneOf(self::$INTEGER_TOKEN_TYPES); - $token = $lexer->getTokenUnderCursor(); return new IntegerLiteralNode( - rangeInSource: $token->rangeInSource, - format: $this->getIntegerFormatFromToken($token), - value: $token->value + rangeInSource: $lexer->getCursorRange(), + format: $this->getIntegerFormatFromToken($lexer->getTokenTypeUnderCursor()), + value: $lexer->getBuffer() ); } catch (LexerException $e) { throw IntegerLiteralCouldNotBeParsed::becauseOfLexerException($e); } } - private function getIntegerFormatFromToken(Token $token): IntegerFormat + private function getIntegerFormatFromToken(TokenType $tokenType): IntegerFormat { - return match ($token->type) { + return match ($tokenType) { TokenType::INTEGER_BINARY => IntegerFormat::BINARY, TokenType::INTEGER_OCTAL => IntegerFormat::OCTAL, TokenType::INTEGER_DECIMAL => IntegerFormat::DECIMAL, @@ -75,7 +74,7 @@ private function getIntegerFormatFromToken(Token $token): IntegerFormat default => throw new LogicException( sprintf( 'Expected %s to be one of %s', - $token->type->value, + $tokenType->value, DebugHelper::describeTokenTypes($this->INTEGER_TOKEN_TYPES) ) ) diff --git a/src/Language/Parser/NullLiteral/NullLiteralParser.php b/src/Language/Parser/NullLiteral/NullLiteralParser.php index 8bb526a..8ea36d8 100644 --- a/src/Language/Parser/NullLiteral/NullLiteralParser.php +++ b/src/Language/Parser/NullLiteral/NullLiteralParser.php @@ -34,10 +34,9 @@ final class NullLiteralParser public function parse(Lexer $lexer): NullLiteralNode { $lexer->read(TokenType::KEYWORD_NULL); - $token = $lexer->getTokenUnderCursor(); return new NullLiteralNode( - rangeInSource: $token->rangeInSource + rangeInSource: $lexer->getCursorRange() ); } } diff --git a/src/Language/Parser/PropertyDeclaration/PropertyDeclarationParser.php b/src/Language/Parser/PropertyDeclaration/PropertyDeclarationParser.php index 18aef6b..50646a2 100644 --- a/src/Language/Parser/PropertyDeclaration/PropertyDeclarationParser.php +++ b/src/Language/Parser/PropertyDeclaration/PropertyDeclarationParser.php @@ -39,25 +39,31 @@ final class PropertyDeclarationParser public function parse(Lexer $lexer): PropertyDeclarationNode { - $lexer->read(TokenType::WORD); - $propertyNameToken = $lexer->getTokenUnderCursor(); + $name = $this->parsePropertyName($lexer); $lexer->read(TokenType::SYMBOL_COLON); $lexer->skipSpace(); $this->typeReferenceParser ??= TypeReferenceParser::singleton(); - $typeReferenceNode = $this->typeReferenceParser->parse($lexer); + $type = $this->typeReferenceParser->parse($lexer); return new PropertyDeclarationNode( rangeInSource: Range::from( - $propertyNameToken->rangeInSource->start, - $typeReferenceNode->rangeInSource->end - ), - name: new PropertyNameNode( - rangeInSource: $propertyNameToken->rangeInSource, - value: PropertyName::from($propertyNameToken->value) + $name->rangeInSource->start, + $type->rangeInSource->end ), - type: $typeReferenceNode + name: $name, + type: $type + ); + } + + public function parsePropertyName(Lexer $lexer): PropertyNameNode + { + $lexer->read(TokenType::WORD); + + return new PropertyNameNode( + rangeInSource: $lexer->getCursorRange(), + value: PropertyName::from($lexer->getBuffer()) ); } } diff --git a/src/Language/Parser/StringLiteral/StringLiteralParser.php b/src/Language/Parser/StringLiteral/StringLiteralParser.php index 8f4b105..701166d 100644 --- a/src/Language/Parser/StringLiteral/StringLiteralParser.php +++ b/src/Language/Parser/StringLiteral/StringLiteralParser.php @@ -40,11 +40,11 @@ public function parse(Lexer $lexer): StringLiteralNode $value = ''; while (!$lexer->peek(TokenType::STRING_LITERAL_DELIMITER)) { if ($lexer->probe(TokenType::STRING_LITERAL_CONTENT)) { - $value = $lexer->getTokenUnderCursor()->value; + $value = $lexer->getBuffer(); } if ($lexer->probe(TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER)) { - $value = $lexer->getTokenUnderCursor()->value; + $value = $lexer->getBuffer(); } break; } diff --git a/src/Language/Parser/StructDeclaration/StructDeclarationParser.php b/src/Language/Parser/StructDeclaration/StructDeclarationParser.php index b6a6a47..fa1aea6 100644 --- a/src/Language/Parser/StructDeclaration/StructDeclarationParser.php +++ b/src/Language/Parser/StructDeclaration/StructDeclarationParser.php @@ -58,14 +58,14 @@ public function parse(Lexer $lexer): StructDeclarationNode private function parseStructName(Lexer $lexer): StructNameNode { $lexer->read(TokenType::WORD); - $structNameToken = $lexer->getTokenUnderCursor(); + $structNameNode = new StructNameNode( + rangeInSource: $lexer->getCursorRange(), + value: StructName::from($lexer->getBuffer()) + ); $lexer->skipSpaceAndComments(); - return new StructNameNode( - rangeInSource: $structNameToken->rangeInSource, - value: StructName::from($structNameToken->value) - ); + return $structNameNode; } public function parsePropertyDeclarations(Lexer $lexer): PropertyDeclarationNodes diff --git a/src/Language/Parser/Tag/TagParser.php b/src/Language/Parser/Tag/TagParser.php index 6895332..4e5f823 100644 --- a/src/Language/Parser/Tag/TagParser.php +++ b/src/Language/Parser/Tag/TagParser.php @@ -100,14 +100,17 @@ public function parse(Lexer $lexer): TagNode private function parseName(Lexer $lexer): TagNameNode { $lexer->read(TokenType::WORD); - $tagNameToken = $lexer->getTokenUnderCursor(); + $tagNameNode = new TagNameNode( + rangeInSource: Range::from( + $lexer->getStartPosition(), + $lexer->getEndPosition() + ), + value: TagName::from($lexer->getBuffer()) + ); $lexer->skipSpace(); - return new TagNameNode( - rangeInSource: $tagNameToken->rangeInSource, - value: TagName::from($tagNameToken->value) - ); + return $tagNameNode; } private function parseAttributes(Lexer $lexer): AttributeNodes @@ -140,11 +143,10 @@ private function parseAttribute(Lexer $lexer): AttributeNode private function parseAttributeName(Lexer $lexer): AttributeNameNode { $lexer->read(TokenType::WORD); - $attributeNameToken = $lexer->getTokenUnderCursor(); return new AttributeNameNode( - rangeInSource: $attributeNameToken->rangeInSource, - value: AttributeName::from($attributeNameToken->value) + rangeInSource: $lexer->getCursorRange(), + value: AttributeName::from($lexer->getBuffer()) ); } @@ -220,15 +222,15 @@ private function readClosingTagName(Lexer $lexer, TagName $expectedName): void $start = $lexer->getStartPosition(); $lexer->read(TokenType::WORD); - $closingNameToken = $lexer->getTokenUnderCursor(); + $closingName = $lexer->getBuffer(); $lexer->read(TokenType::BRACKET_ANGLE_CLOSE); $end = $lexer->getEndPosition(); - if ($closingNameToken->value !== $expectedName->value) { + if ($closingName !== $expectedName->value) { throw TagCouldNotBeParsed::becauseOfClosingTagNameMismatch( expectedTagName: $expectedName, - actualTagName: $closingNameToken->value, + actualTagName: $closingName, affectedRangeInSource: Range::from($start, $end) ); } diff --git a/src/Language/Parser/TemplateLiteral/TemplateLiteralParser.php b/src/Language/Parser/TemplateLiteral/TemplateLiteralParser.php index 0dee8d6..9757763 100644 --- a/src/Language/Parser/TemplateLiteral/TemplateLiteralParser.php +++ b/src/Language/Parser/TemplateLiteral/TemplateLiteralParser.php @@ -100,11 +100,10 @@ public function parseSegments(Lexer $lexer): TemplateLiteralSegments public function parseStringSegment(Lexer $lexer): TemplateLiteralStringSegmentNode { $lexer->read(TokenType::TEMPLATE_LITERAL_CONTENT); - $stringToken = $lexer->getTokenUnderCursor(); return new TemplateLiteralStringSegmentNode( - rangeInSource: $stringToken->rangeInSource, - value: $stringToken->value + rangeInSource: $lexer->getCursorRange(), + value: $lexer->getBuffer() ); } diff --git a/src/Language/Parser/Text/TextParser.php b/src/Language/Parser/Text/TextParser.php index 3497dff..e405eab 100644 --- a/src/Language/Parser/Text/TextParser.php +++ b/src/Language/Parser/Text/TextParser.php @@ -85,7 +85,7 @@ public function parse(Lexer $lexer, bool $preserveLeadingSpace = false): ?TextNo $hasTrailingSpace = false; $trailingSpaceContainsLineBreaks = false; } - $value .= $lexer->getTokenUnderCursor()->value; + $value .= $lexer->getBuffer(); continue; } diff --git a/src/Language/Parser/TypeReference/TypeReferenceParser.php b/src/Language/Parser/TypeReference/TypeReferenceParser.php index c11afab..a1075fb 100644 --- a/src/Language/Parser/TypeReference/TypeReferenceParser.php +++ b/src/Language/Parser/TypeReference/TypeReferenceParser.php @@ -83,11 +83,10 @@ public function parseTypeName(Lexer $lexer): TypeNameNode { $lexer->read(TokenType::WORD); $this->start ??= $lexer->getStartPosition(); - $typeNameToken = $lexer->getTokenUnderCursor(); return new TypeNameNode( - rangeInSource: $typeNameToken->rangeInSource, - value: TypeName::from($typeNameToken->value) + rangeInSource: $lexer->getCursorRange(), + value: TypeName::from($lexer->getBuffer()) ); } diff --git a/src/Language/Parser/ValueReference/ValueReferenceParser.php b/src/Language/Parser/ValueReference/ValueReferenceParser.php index d00e891..49aef07 100644 --- a/src/Language/Parser/ValueReference/ValueReferenceParser.php +++ b/src/Language/Parser/ValueReference/ValueReferenceParser.php @@ -35,11 +35,10 @@ final class ValueReferenceParser public function parse(Lexer $lexer): ValueReferenceNode { $lexer->read(TokenType::WORD); - $token = $lexer->getTokenUnderCursor(); return new ValueReferenceNode( - rangeInSource: $token->rangeInSource, - name: VariableName::from($token->value) + rangeInSource: $lexer->getCursorRange(), + name: VariableName::from($lexer->getBuffer()) ); } } diff --git a/src/Language/Util/DebugHelper.php b/src/Language/Util/DebugHelper.php index 7c45808..05c3a45 100644 --- a/src/Language/Util/DebugHelper.php +++ b/src/Language/Util/DebugHelper.php @@ -33,7 +33,6 @@ use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralStringSegmentNode; use PackageFactory\ComponentEngine\Language\AST\Node\TernaryOperation\TernaryOperationNode; use PackageFactory\ComponentEngine\Language\AST\Node\ValueReference\ValueReferenceNode; -use PackageFactory\ComponentEngine\Language\Lexer\Token\Token; use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; @@ -127,11 +126,6 @@ public static function describeTokenTypes(TokenTypes $tokenTypes): string )) . ' or ' . self::describeTokenType($trailingItem); } - public static function describeToken(Token $token): string - { - return sprintf('%s ("%s")', $token->type->value, $token->value); - } - public static function printASTNode(Node $node, string $indentation = ''): string { return $indentation . match ($node::class) { diff --git a/src/Parser/Source/Position.php b/src/Parser/Source/Position.php index e7e1b39..bcd4206 100644 --- a/src/Parser/Source/Position.php +++ b/src/Parser/Source/Position.php @@ -37,13 +37,18 @@ public static function zero(): self return self::$zero ??= new self(0, 0); } + public static function from(int $lineNumber, int $columnNumber): self + { + return new self($lineNumber, $columnNumber); + } + public function toDebugString(): string { return sprintf('line %s, column %s', $this->lineNumber, $this->columnNumber); } - public function toRange(): Range + public function toRange(?Position $endPosition = null): Range { - return Range::from($this, $this); + return Range::from($this, $endPosition ?? $this); } } diff --git a/test/Unit/Language/Lexer/LexerTest.php b/test/Unit/Language/Lexer/LexerTest.php index da12dfc..9bb0cb1 100644 --- a/test/Unit/Language/Lexer/LexerTest.php +++ b/test/Unit/Language/Lexer/LexerTest.php @@ -24,7 +24,6 @@ use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\LexerException; -use PackageFactory\ComponentEngine\Language\Lexer\Token\Token; use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; use PackageFactory\ComponentEngine\Parser\Source\Position; @@ -33,16 +32,43 @@ final class LexerTest extends TestCase { - /** - * @param array{int,int} $startAsArray - * @param array{int,int} $endAsArray - * @return Range - */ - protected static function range(array $startAsArray, array $endAsArray): Range - { - return Range::from( - new Position(...$startAsArray), - new Position(...$endAsArray) + private Lexer $lexer; + + protected function assertLexerState( + Position $startPosition, + Position $endPosition, + TokenType $tokenTypeUnderCursor, + string $buffer, + bool $isEnd + ): void { + $this->assertEquals( + $startPosition, + $this->lexer->getStartPosition(), + 'Failed asserting that start position of lexer equals' + ); + + $this->assertEquals( + $endPosition, + $this->lexer->getEndPosition(), + 'Failed asserting that end position of lexer equals' + ); + + $this->assertEquals( + $tokenTypeUnderCursor, + $this->lexer->getTokenTypeUnderCursor(), + 'Failed asserting that token type under cursor of lexer equals' + ); + + $this->assertEquals( + $buffer, + $this->lexer->getBuffer(), + 'Failed asserting that buffer of lexer equals' + ); + + $this->assertEquals( + $isEnd, + $this->lexer->isEnd(), + 'Failed asserting that isEnd of lexer equals' ); } @@ -221,31 +247,15 @@ public static function singleTokenExamples(): iterable */ public function readSavesTokenOfGivenTypeIfMatchIsFound(string $source, TokenType $expectedTokenType): void { - $lexer = new Lexer($source); - $lexer->read($expectedTokenType); - - $this->assertEquals( - $expectedTokenType, - $lexer->getTokenTypeUnderCursor() - ); - - $this->assertEquals( - new Position(0, 0), - $lexer->getStartPosition() - ); - - $this->assertEquals( - new Position(0, \mb_strlen($source) - 1), - $lexer->getEndPosition() - ); - - $this->assertEquals( - new Token( - rangeInSource: self::range([0, 0], [0, \mb_strlen($source) - 1]), - type: $expectedTokenType, - value: $source - ), - $lexer->getTokenUnderCursor() + $this->lexer = new Lexer($source); + $this->lexer->read($expectedTokenType); + + $this->assertLexerState( + startPosition: Position::from(0, 0), + endPosition: Position::from(0, \mb_strlen($source) - 1), + tokenTypeUnderCursor: $expectedTokenType, + buffer: $source, + isEnd: true ); } @@ -258,45 +268,29 @@ public function readSavesTokenOfGivenTypeIfMatchIsFound(string $source, TokenTyp */ public function readOneOfSavesTokenOfGivenTypeIfMatchIsFound(string $source, TokenType $expectedTokenType): void { - $lexer = new Lexer($source); - $lexer->readOneOf(TokenTypes::from($expectedTokenType)); - - $this->assertEquals( - $expectedTokenType, - $lexer->getTokenTypeUnderCursor() - ); - - $this->assertEquals( - new Position(0, 0), - $lexer->getStartPosition() - ); - - $this->assertEquals( - new Position(0, \mb_strlen($source) - 1), - $lexer->getEndPosition() - ); - - $this->assertEquals( - new Token( - rangeInSource: self::range([0, 0], [0, \mb_strlen($source) - 1]), - type: $expectedTokenType, - value: $source - ), - $lexer->getTokenUnderCursor() + $this->lexer = new Lexer($source); + $this->lexer->readOneOf(TokenTypes::from($expectedTokenType)); + + $this->assertLexerState( + startPosition: Position::from(0, 0), + endPosition: Position::from(0, \mb_strlen($source) - 1), + tokenTypeUnderCursor: $expectedTokenType, + buffer: $source, + isEnd: true ); } /** - * @return iterable + * @return iterable */ public static function multipleTokensExamples(): iterable { yield ($source = "# This is a comment\n# This is also a comment") => [ $source, TokenTypes::from(TokenType::COMMENT, TokenType::END_OF_LINE), - new Token(self::range([0, 0], [0, 18]), TokenType::COMMENT, '# This is a comment'), - new Token(self::range([0, 19], [0, 19]), TokenType::END_OF_LINE, "\n"), - new Token(self::range([1, 0], [1, 23]), TokenType::COMMENT, '# This is also a comment') + [[0, 0], [0, 18], TokenType::COMMENT, '# This is a comment'], + [[0, 19], [0, 19], TokenType::END_OF_LINE, "\n"], + [[1, 0], [1, 23], TokenType::COMMENT, '# This is also a comment'], ]; yield ($source = "1765224, -0xAB89CD, true\nnull") => [ @@ -311,16 +305,16 @@ public static function multipleTokensExamples(): iterable TokenType::KEYWORD_TRUE, TokenType::KEYWORD_NULL ), - new Token(self::range([0, 0], [0, 6]), TokenType::INTEGER_DECIMAL, '1765224'), - new Token(self::range([0, 7], [0, 7]), TokenType::SYMBOL_COMMA, ','), - new Token(self::range([0, 8], [0, 8]), TokenType::SPACE, ' '), - new Token(self::range([0, 9], [0, 9]), TokenType::SYMBOL_DASH, '-'), - new Token(self::range([0, 10], [0, 17]), TokenType::INTEGER_HEXADECIMAL, '0xAB89CD'), - new Token(self::range([0, 18], [0, 18]), TokenType::SYMBOL_COMMA, ','), - new Token(self::range([0, 19], [0, 19]), TokenType::SPACE, ' '), - new Token(self::range([0, 20], [0, 23]), TokenType::KEYWORD_TRUE, 'true'), - new Token(self::range([0, 24], [0, 24]), TokenType::END_OF_LINE, "\n"), - new Token(self::range([1, 0], [1, 3]), TokenType::KEYWORD_NULL, 'null') + [[0, 0], [0, 6], TokenType::INTEGER_DECIMAL, '1765224'], + [[0, 7], [0, 7], TokenType::SYMBOL_COMMA, ','], + [[0, 8], [0, 8], TokenType::SPACE, ' '], + [[0, 9], [0, 9], TokenType::SYMBOL_DASH, '-'], + [[0, 10], [0, 17], TokenType::INTEGER_HEXADECIMAL, '0xAB89CD'], + [[0, 18], [0, 18], TokenType::SYMBOL_COMMA, ','], + [[0, 19], [0, 19], TokenType::SPACE, ' '], + [[0, 20], [0, 23], TokenType::KEYWORD_TRUE, 'true'], + [[0, 24], [0, 24], TokenType::END_OF_LINE, "\n"], + [[1, 0], [1, 3], TokenType::KEYWORD_NULL, 'null'], ]; yield ($source = '0b100101 892837 0xFFAAEE 0o75374') => [ @@ -332,13 +326,13 @@ public static function multipleTokensExamples(): iterable TokenType::INTEGER_DECIMAL, TokenType::SPACE ), - new Token(self::range([0, 0], [0, 7]), TokenType::INTEGER_BINARY, '0b100101'), - new Token(self::range([0, 8], [0, 8]), TokenType::SPACE, ' '), - new Token(self::range([0, 9], [0, 14]), TokenType::INTEGER_DECIMAL, '892837'), - new Token(self::range([0, 15], [0, 15]), TokenType::SPACE, ' '), - new Token(self::range([0, 16], [0, 23]), TokenType::INTEGER_HEXADECIMAL, '0xFFAAEE'), - new Token(self::range([0, 24], [0, 24]), TokenType::SPACE, ' '), - new Token(self::range([0, 25], [0, 31]), TokenType::INTEGER_OCTAL, '0o75374'), + [[0, 0], [0, 7], TokenType::INTEGER_BINARY, '0b100101'], + [[0, 8], [0, 8], TokenType::SPACE, ' '], + [[0, 9], [0, 14], TokenType::INTEGER_DECIMAL, '892837'], + [[0, 15], [0, 15], TokenType::SPACE, ' '], + [[0, 16], [0, 23], TokenType::INTEGER_HEXADECIMAL, '0xFFAAEE'], + [[0, 24], [0, 24], TokenType::SPACE, ' '], + [[0, 25], [0, 31], TokenType::INTEGER_OCTAL, '0o75374'], ]; yield ($source = '"This is a string literal with \\n escapes \\xB1 \\u5FA9 \\u{1343E}!"') => [ @@ -351,17 +345,17 @@ public static function multipleTokensExamples(): iterable TokenType::ESCAPE_SEQUENCE_UNICODE, TokenType::ESCAPE_SEQUENCE_UNICODE_CODEPOINT ), - new Token(self::range([0, 0], [0, 0]), TokenType::STRING_LITERAL_DELIMITER, '"'), - new Token(self::range([0, 1], [0, 30]), TokenType::STRING_LITERAL_CONTENT, 'This is a string literal with '), - new Token(self::range([0, 31], [0, 32]), TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\n'), - new Token(self::range([0, 33], [0, 41]), TokenType::STRING_LITERAL_CONTENT, ' escapes '), - new Token(self::range([0, 42], [0, 45]), TokenType::ESCAPE_SEQUENCE_HEXADECIMAL, '\\xB1'), - new Token(self::range([0, 46], [0, 46]), TokenType::STRING_LITERAL_CONTENT, ' '), - new Token(self::range([0, 47], [0, 52]), TokenType::ESCAPE_SEQUENCE_UNICODE, '\\u5FA9'), - new Token(self::range([0, 53], [0, 53]), TokenType::STRING_LITERAL_CONTENT, ' '), - new Token(self::range([0, 54], [0, 62]), TokenType::ESCAPE_SEQUENCE_UNICODE_CODEPOINT, '\\u{1343E}'), - new Token(self::range([0, 63], [0, 63]), TokenType::STRING_LITERAL_CONTENT, '!'), - new Token(self::range([0, 64], [0, 64]), TokenType::STRING_LITERAL_DELIMITER, '"') + [[0, 0], [0, 0], TokenType::STRING_LITERAL_DELIMITER, '"'], + [[0, 1], [0, 30], TokenType::STRING_LITERAL_CONTENT, 'This is a string literal with '], + [[0, 31], [0, 32], TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\n'], + [[0, 33], [0, 41], TokenType::STRING_LITERAL_CONTENT, ' escapes '], + [[0, 42], [0, 45], TokenType::ESCAPE_SEQUENCE_HEXADECIMAL, '\\xB1'], + [[0, 46], [0, 46], TokenType::STRING_LITERAL_CONTENT, ' '], + [[0, 47], [0, 52], TokenType::ESCAPE_SEQUENCE_UNICODE, '\\u5FA9'], + [[0, 53], [0, 53], TokenType::STRING_LITERAL_CONTENT, ' '], + [[0, 54], [0, 62], TokenType::ESCAPE_SEQUENCE_UNICODE_CODEPOINT, '\\u{1343E}'], + [[0, 63], [0, 63], TokenType::STRING_LITERAL_CONTENT, '!'], + [[0, 64], [0, 64], TokenType::STRING_LITERAL_DELIMITER, '"'] ]; $source = <<'), - new Token(self::range([0, 11], [0, 11]), TokenType::END_OF_LINE, "\n"), - new Token(self::range([1, 0], [1, 3]), TokenType::SPACE, ' '), - new Token(self::range([1, 4], [1, 4]), TokenType::BRACKET_ANGLE_OPEN, '<'), - new Token(self::range([1, 5], [1, 6]), TokenType::WORD, 'my'), - new Token(self::range([1, 7], [1, 7]), TokenType::SYMBOL_DASH, '-'), - new Token(self::range([1, 8], [1, 14]), TokenType::WORD, 'element'), - new Token(self::range([1, 15], [1, 15]), TokenType::SYMBOL_SLASH_FORWARD, '/'), - new Token(self::range([1, 16], [1, 16]), TokenType::BRACKET_ANGLE_CLOSE, '>'), - new Token(self::range([1, 17], [1, 17]), TokenType::END_OF_LINE, "\n"), - new Token(self::range([2, 0], [2, 3]), TokenType::SPACE, ' '), - new Token(self::range([2, 4], [2, 4]), TokenType::BRACKET_ANGLE_OPEN, '<'), - new Token(self::range([2, 5], [2, 7]), TokenType::WORD, 'div'), - new Token(self::range([2, 8], [2, 8]), TokenType::SPACE, ' '), - new Token(self::range([2, 9], [2, 13]), TokenType::WORD, 'class'), - new Token(self::range([2, 14], [2, 14]), TokenType::SYMBOL_EQUALS, '='), - new Token(self::range([2, 15], [2, 15]), TokenType::BRACKET_CURLY_OPEN, '{'), - new Token(self::range([2, 16], [2, 16]), TokenType::BRACKET_CURLY_CLOSE, '}'), - new Token(self::range([2, 17], [2, 17]), TokenType::SPACE, ' '), - new Token(self::range([2, 18], [2, 20]), TokenType::WORD, 'foo'), - new Token(self::range([2, 21], [2, 21]), TokenType::SYMBOL_COLON, ':'), - new Token(self::range([2, 22], [2, 24]), TokenType::WORD, 'bar'), - new Token(self::range([2, 25], [2, 25]), TokenType::BRACKET_ANGLE_CLOSE, '>'), - new Token(self::range([2, 26], [2, 26]), TokenType::END_OF_LINE, "\n"), - new Token(self::range([3, 0], [3, 3]), TokenType::SPACE, ' '), - new Token(self::range([3, 4], [3, 4]), TokenType::BRACKET_ANGLE_OPEN, '<'), - new Token(self::range([3, 5], [3, 5]), TokenType::SYMBOL_SLASH_FORWARD, '/'), - new Token(self::range([3, 6], [3, 8]), TokenType::WORD, 'div'), - new Token(self::range([3, 9], [3, 9]), TokenType::BRACKET_ANGLE_CLOSE, '>'), - new Token(self::range([3, 10], [3, 10]), TokenType::END_OF_LINE, "\n"), - new Token(self::range([4, 0], [4, 0]), TokenType::BRACKET_ANGLE_OPEN, '<'), - new Token(self::range([4, 1], [4, 1]), TokenType::SYMBOL_SLASH_FORWARD, '/'), - new Token(self::range([4, 2], [4, 2]), TokenType::WORD, 'a'), - new Token(self::range([4, 3], [4, 3]), TokenType::BRACKET_ANGLE_CLOSE, '>'), + [[0, 0], [0, 0], TokenType::BRACKET_ANGLE_OPEN, '<'], + [[0, 1], [0, 1], TokenType::WORD, 'a'], + [[0, 2], [0, 2], TokenType::SPACE, ' '], + [[0, 3], [0, 6], TokenType::WORD, 'href'], + [[0, 7], [0, 7], TokenType::SYMBOL_EQUALS, '='], + [[0, 8], [0, 8], TokenType::STRING_LITERAL_DELIMITER, '"'], + [[0, 9], [0, 9], TokenType::STRING_LITERAL_DELIMITER, '"'], + [[0, 10], [0, 10], TokenType::BRACKET_ANGLE_CLOSE, '>'], + [[0, 11], [0, 11], TokenType::END_OF_LINE, "\n"], + [[1, 0], [1, 3], TokenType::SPACE, ' '], + [[1, 4], [1, 4], TokenType::BRACKET_ANGLE_OPEN, '<'], + [[1, 5], [1, 6], TokenType::WORD, 'my'], + [[1, 7], [1, 7], TokenType::SYMBOL_DASH, '-'], + [[1, 8], [1, 14], TokenType::WORD, 'element'], + [[1, 15], [1, 15], TokenType::SYMBOL_SLASH_FORWARD, '/'], + [[1, 16], [1, 16], TokenType::BRACKET_ANGLE_CLOSE, '>'], + [[1, 17], [1, 17], TokenType::END_OF_LINE, "\n"], + [[2, 0], [2, 3], TokenType::SPACE, ' '], + [[2, 4], [2, 4], TokenType::BRACKET_ANGLE_OPEN, '<'], + [[2, 5], [2, 7], TokenType::WORD, 'div'], + [[2, 8], [2, 8], TokenType::SPACE, ' '], + [[2, 9], [2, 13], TokenType::WORD, 'class'], + [[2, 14], [2, 14], TokenType::SYMBOL_EQUALS, '='], + [[2, 15], [2, 15], TokenType::BRACKET_CURLY_OPEN, '{'], + [[2, 16], [2, 16], TokenType::BRACKET_CURLY_CLOSE, '}'], + [[2, 17], [2, 17], TokenType::SPACE, ' '], + [[2, 18], [2, 20], TokenType::WORD, 'foo'], + [[2, 21], [2, 21], TokenType::SYMBOL_COLON, ':'], + [[2, 22], [2, 24], TokenType::WORD, 'bar'], + [[2, 25], [2, 25], TokenType::BRACKET_ANGLE_CLOSE, '>'], + [[2, 26], [2, 26], TokenType::END_OF_LINE, "\n"], + [[3, 0], [3, 3], TokenType::SPACE, ' '], + [[3, 4], [3, 4], TokenType::BRACKET_ANGLE_OPEN, '<'], + [[3, 5], [3, 5], TokenType::SYMBOL_SLASH_FORWARD, '/'], + [[3, 6], [3, 8], TokenType::WORD, 'div'], + [[3, 9], [3, 9], TokenType::BRACKET_ANGLE_CLOSE, '>'], + [[3, 10], [3, 10], TokenType::END_OF_LINE, "\n"], + [[4, 0], [4, 0], TokenType::BRACKET_ANGLE_OPEN, '<'], + [[4, 1], [4, 1], TokenType::SYMBOL_SLASH_FORWARD, '/'], + [[4, 2], [4, 2], TokenType::WORD, 'a'], + [[4, 3], [4, 3], TokenType::BRACKET_ANGLE_CLOSE, '>'], ]; $source = <<'), - new Token(self::range([1, 33], [1, 39]), TokenType::TEXT, 'inside.'), + [[0, 0], [0, 30], TokenType::TEXT, 'ThisIsSomeText-with-expressions'], + [[0, 31], [0, 31], TokenType::BRACKET_CURLY_OPEN, '{'], + [[0, 32], [0, 32], TokenType::BRACKET_CURLY_CLOSE, '}'], + [[0, 33], [0, 33], TokenType::END_OF_LINE, "\n"], + [[1, 0], [1, 11], TokenType::TEXT, 'line-breaks,'], + [[1, 12], [1, 14], TokenType::SPACE, ' '], + [[1, 15], [1, 20], TokenType::TEXT, 'spaces'], + [[1, 21], [1, 23], TokenType::SPACE, ' '], + [[1, 24], [1, 30], TokenType::TEXT, 'andTags'], + [[1, 31], [1, 31], TokenType::BRACKET_ANGLE_OPEN, '<'], + [[1, 32], [1, 32], TokenType::BRACKET_ANGLE_CLOSE, '>'], + [[1, 33], [1, 39], TokenType::TEXT, 'inside.'], ]; } @@ -508,23 +502,27 @@ public static function multipleTokensExamples(): iterable * @dataProvider multipleTokensExamples * @test * @param string $source - * @param Token ...$expectedTokens + * @param array{array{int,int},array{int,int},TokenType,string} ...$expectedLexerStates * @return void */ public function testReadOneOfWithMultipleTokenTypes( string $source, TokenTypes $tokenTypes, - Token ...$expectedTokens + array ...$expectedLexerStates ): void { - $lexer = new Lexer($source); - - $actualTokens = []; - foreach ($expectedTokens as $token) { - $lexer->readOneOf($tokenTypes); - $actualTokens[] = $lexer->getTokenUnderCursor(); + $this->lexer = new Lexer($source); + + foreach ($expectedLexerStates as $i => $expectedLexerState) { + $this->lexer->readOneOf($tokenTypes); + + $this->assertLexerState( + startPosition: Position::from(...$expectedLexerState[0]), + endPosition: Position::from(...$expectedLexerState[1]), + tokenTypeUnderCursor: $expectedLexerState[2], + buffer: $expectedLexerState[3], + isEnd: $i === count($expectedLexerStates) - 1 + ); } - - $this->assertEquals($expectedTokens, $actualTokens); } /** @@ -540,7 +538,10 @@ public static function failingSingleTokenExamples(): iterable yield sprintf('%s: %s', $type->value, $source) => [ $source, $type, - self::range([0, 0], [0, \mb_strlen($unexpectedCharacterSequence) - 1]), + Range::from( + Position::from(0, 0), + Position::from(0, \mb_strlen($unexpectedCharacterSequence) - 1), + ), $unexpectedCharacterSequence ]; }; @@ -666,8 +667,8 @@ public function throwsIfCharacterSequenceDoesNotMatchSingleTokenType( ): void { $this->assertThrowsLexerException( function () use ($source, $expectedTokenType) { - $lexer = new Lexer($source); - $lexer->read($expectedTokenType); + $this->lexer = new Lexer($source); + $this->lexer->read($expectedTokenType); }, LexerException::becauseOfUnexpectedCharacterSequence( expectedTokenTypes: TokenTypes::from($expectedTokenType), @@ -688,7 +689,10 @@ public static function failingMultipleTokensExamples(): iterable 3, LexerException::becauseOfUnexpectedCharacterSequence( expectedTokenTypes: $tokenTypes, - affectedRangeInSource: self::range([1, 0], [1, 0]), + affectedRangeInSource: Range::from( + Position::from(1, 0), + Position::from(1, 0) + ), actualCharacterSequence: 'T' ) ]; @@ -711,10 +715,10 @@ public function throwsIfCharacterSequenceDoesNotMatchMultipleTokenTypes( ): void { $this->assertThrowsLexerException( function () use ($source, $tokenTypes, $numberOfReadOperations) { - $lexer = new Lexer($source); + $this->lexer = new Lexer($source); foreach(range(0, $numberOfReadOperations) as $i) { - $lexer->readOneOf($tokenTypes); + $this->lexer->readOneOf($tokenTypes); } }, $expectedLexerException @@ -728,12 +732,15 @@ public function throwsIfSourceEndsUnexpectedlyWhileReadingASingleTokenType(): vo { $this->assertThrowsLexerException( function () { - $lexer = new Lexer(''); - $lexer->read(TokenType::KEYWORD_NULL); + $this->lexer = new Lexer(''); + $this->lexer->read(TokenType::KEYWORD_NULL); }, LexerException::becauseOfUnexpectedEndOfSource( expectedTokenTypes: TokenTypes::from(TokenType::KEYWORD_NULL), - affectedRangeInSource: self::range([0, 0], [0, 0]) + affectedRangeInSource: Range::from( + Position::from(0, 0), + Position::from(0, 0) + ) ) ); @@ -745,7 +752,10 @@ function () { }, LexerException::becauseOfUnexpectedEndOfSource( expectedTokenTypes: TokenTypes::from(TokenType::KEYWORD_NULL), - affectedRangeInSource: self::range([0, 0], [0, 4]) + affectedRangeInSource: Range::from( + Position::from(0, 0), + Position::from(0, 4) + ) ) ); } @@ -765,7 +775,10 @@ public static function multipleTokenTypeUnexpectedEndOfSourceExamples(): iterabl 1, LexerException::becauseOfUnexpectedEndOfSource( expectedTokenTypes: $tokenTypes, - affectedRangeInSource: self::range([0, 0], [0, 0]) + affectedRangeInSource: Range::from( + Position::from(0, 0), + Position::from(0, 0) + ) ) ]; @@ -779,7 +792,10 @@ public static function multipleTokenTypeUnexpectedEndOfSourceExamples(): iterabl 2, LexerException::becauseOfUnexpectedEndOfSource( expectedTokenTypes: $tokenTypes, - affectedRangeInSource: self::range([0, 6], [0, 6]) + affectedRangeInSource: Range::from( + Position::from(0, 6), + Position::from(0, 6) + ) ) ]; @@ -793,7 +809,10 @@ public static function multipleTokenTypeUnexpectedEndOfSourceExamples(): iterabl 3, LexerException::becauseOfUnexpectedEndOfSource( expectedTokenTypes: $tokenTypes, - affectedRangeInSource: self::range([0, 7], [0, 7]) + affectedRangeInSource: Range::from( + Position::from(0, 7), + Position::from(0, 7) + ) ) ]; } @@ -815,10 +834,10 @@ public function throwsIfSourceEndsUnexpectedlyWhileReadingMultipleTokenTypes( ): void { $this->assertThrowsLexerException( function () use ($source, $tokenTypes, $numberOfReadOperations) { - $lexer = new Lexer($source); + $this->lexer = new Lexer($source); foreach(range(0, $numberOfReadOperations) as $i) { - $lexer->readOneOf($tokenTypes); + $this->lexer->readOneOf($tokenTypes); } }, $expectedLexerException @@ -831,35 +850,33 @@ function () use ($source, $tokenTypes, $numberOfReadOperations) { public function skipsSpace(): void { // Single - $lexer = new Lexer('return ' . "\t\n\t" . ' 42'); - - $lexer->read(TokenType::KEYWORD_RETURN); - $lexer->skipSpace(); - $lexer->read(TokenType::INTEGER_DECIMAL); - - $this->assertEquals( - new Token( - rangeInSource: self::range([1, 4], [1, 5]), - type: TokenType::INTEGER_DECIMAL, - value: '42' - ), - $lexer->getTokenUnderCursor() + $this->lexer = new Lexer('return ' . "\t\n\t" . ' 42'); + + $this->lexer->read(TokenType::KEYWORD_RETURN); + $this->lexer->skipSpace(); + $this->lexer->read(TokenType::INTEGER_DECIMAL); + + $this->assertLexerState( + startPosition: Position::from(1, 4), + endPosition: Position::from(1, 5), + tokenTypeUnderCursor: TokenType::INTEGER_DECIMAL, + buffer: '42', + isEnd: true ); // Multiple - $lexer = new Lexer('return ' . "\t\n\t" . ' 42'); - - $lexer->readOneOf(TokenTypes::from(TokenType::KEYWORD_RETURN, TokenType::INTEGER_DECIMAL)); - $lexer->skipSpace(); - $lexer->readOneOf(TokenTypes::from(TokenType::KEYWORD_RETURN, TokenType::INTEGER_DECIMAL)); - - $this->assertEquals( - new Token( - rangeInSource: self::range([1, 4], [1, 5]), - type: TokenType::INTEGER_DECIMAL, - value: '42' - ), - $lexer->getTokenUnderCursor() + $this->lexer = new Lexer('return ' . "\t\n\t" . ' 42'); + + $this->lexer->readOneOf(TokenTypes::from(TokenType::KEYWORD_RETURN, TokenType::INTEGER_DECIMAL)); + $this->lexer->skipSpace(); + $this->lexer->readOneOf(TokenTypes::from(TokenType::KEYWORD_RETURN, TokenType::INTEGER_DECIMAL)); + + $this->assertLexerState( + startPosition: Position::from(1, 4), + endPosition: Position::from(1, 5), + tokenTypeUnderCursor: TokenType::INTEGER_DECIMAL, + buffer: '42', + isEnd: true ); } @@ -879,42 +896,41 @@ public function skipsSpaceAndComments(): void EOF; // Single - $lexer = new Lexer($source); - - $lexer->read(TokenType::KEYWORD_IMPORT); - $lexer->skipSpaceAndComments(); - $lexer->read(TokenType::KEYWORD_EXPORT); - $lexer->skipSpaceAndComments(); - $lexer->read(TokenType::KEYWORD_COMPONENT); - - $this->assertEquals( - new Token( - rangeInSource: self::range([6, 4], [6, 12]), - type: TokenType::KEYWORD_COMPONENT, - value: 'component' - ), - $lexer->getTokenUnderCursor() + $this->lexer = new Lexer($source); + + $this->lexer->read(TokenType::KEYWORD_IMPORT); + $this->lexer->skipSpaceAndComments(); + $this->lexer->read(TokenType::KEYWORD_EXPORT); + $this->lexer->skipSpaceAndComments(); + $this->lexer->read(TokenType::KEYWORD_COMPONENT); + + $this->assertLexerState( + startPosition: Position::from(6, 4), + endPosition: Position::from(6, 12), + tokenTypeUnderCursor: TokenType::KEYWORD_COMPONENT, + buffer: 'component', + isEnd: true ); // Multiple - $lexer = new Lexer($source); - $lexer->readOneOf( + $this->lexer = new Lexer($source); + $this->lexer->readOneOf( TokenTypes::from( TokenType::KEYWORD_IMPORT, TokenType::KEYWORD_EXPORT, TokenType::KEYWORD_COMPONENT ) ); - $lexer->skipSpaceAndComments(); - $lexer->readOneOf( + $this->lexer->skipSpaceAndComments(); + $this->lexer->readOneOf( TokenTypes::from( TokenType::KEYWORD_IMPORT, TokenType::KEYWORD_EXPORT, TokenType::KEYWORD_COMPONENT ) ); - $lexer->skipSpaceAndComments(); - $lexer->readOneOf( + $this->lexer->skipSpaceAndComments(); + $this->lexer->readOneOf( TokenTypes::from( TokenType::KEYWORD_IMPORT, TokenType::KEYWORD_EXPORT, @@ -922,39 +938,12 @@ public function skipsSpaceAndComments(): void ) ); - $this->assertEquals( - new Token( - rangeInSource: self::range([6, 4], [6, 12]), - type: TokenType::KEYWORD_COMPONENT, - value: 'component' - ), - $lexer->getTokenUnderCursor() + $this->assertLexerState( + startPosition: Position::from(6, 4), + endPosition: Position::from(6, 12), + tokenTypeUnderCursor: TokenType::KEYWORD_COMPONENT, + buffer: 'component', + isEnd: true ); } - - /** - * @test - */ - public function tellsIfItHasEnded(): void - { - $lexer = new Lexer(''); - - $this->assertTrue($lexer->isEnd()); - - $lexer = new Lexer('return null'); - - $this->assertFalse($lexer->isEnd()); - - $lexer->read(TokenType::KEYWORD_RETURN); - - $this->assertFalse($lexer->isEnd()); - - $lexer->read(TokenType::SPACE); - - $this->assertFalse($lexer->isEnd()); - - $lexer->read(TokenType::KEYWORD_NULL); - - $this->assertTrue($lexer->isEnd()); - } } From 088306635bb82d070020ad0e2ed3f8b2ad647488 Mon Sep 17 00:00:00 2001 From: Wilhelm Behncke Date: Fri, 11 Aug 2023 16:01:54 +0200 Subject: [PATCH 07/19] TASK: Rename TokenType -> Rule --- src/Language/Lexer/Lexer.php | 88 +- src/Language/Lexer/LexerException.php | 10 +- src/Language/Lexer/Matcher/Matcher.php | 124 +-- .../{Token/TokenType.php => Rule/Rule.php} | 4 +- .../{Token/TokenTypes.php => Rule/Rules.php} | 12 +- .../BooleanLiteral/BooleanLiteralParser.php | 14 +- .../ComponentDeclarationParser.php | 26 +- .../EnumDeclaration/EnumDeclarationParser.php | 36 +- src/Language/Parser/Export/ExportParser.php | 22 +- .../Parser/Expression/ExpressionParser.php | 146 ++-- src/Language/Parser/Expression/Precedence.php | 44 +- src/Language/Parser/Import/ImportParser.php | 32 +- .../IntegerLiteral/IntegerLiteralParser.php | 30 +- src/Language/Parser/Match/MatchParser.php | 16 +- src/Language/Parser/Module/ModuleParser.php | 4 +- .../Parser/NullLiteral/NullLiteralParser.php | 4 +- .../PropertyDeclarationParser.php | 6 +- .../StringLiteral/StringLiteralParser.php | 12 +- .../StructDeclarationParser.php | 12 +- src/Language/Parser/Tag/TagParser.php | 52 +- .../TemplateLiteral/TemplateLiteralParser.php | 26 +- src/Language/Parser/Text/TextParser.php | 34 +- .../TypeReference/TypeReferenceParser.php | 12 +- .../ValueReference/ValueReferenceParser.php | 4 +- src/Language/Util/DebugHelper.php | 148 ++-- test/Unit/Language/Lexer/LexerTest.php | 816 +++++++++--------- .../Parser/Export/ExportParserTest.php | 12 +- .../IntegerLiteralParserTest.php | 24 +- 28 files changed, 885 insertions(+), 885 deletions(-) rename src/Language/Lexer/{Token/TokenType.php => Rule/Rule.php} (97%) rename src/Language/Lexer/{Token/TokenTypes.php => Rule/Rules.php} (81%) diff --git a/src/Language/Lexer/Lexer.php b/src/Language/Lexer/Lexer.php index 5e7651f..1af7e97 100644 --- a/src/Language/Lexer/Lexer.php +++ b/src/Language/Lexer/Lexer.php @@ -26,40 +26,40 @@ use PackageFactory\ComponentEngine\Language\Lexer\CharacterStream\CharacterStream; use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result; -use PackageFactory\ComponentEngine\Language\Lexer\Token\Token; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Token; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Parser\Source\Position; use PackageFactory\ComponentEngine\Parser\Source\Range; final class Lexer { - private readonly TokenTypes $TOKEN_TYPES_SPACE; - private readonly TokenTypes $TOKEN_TYPES_SPACE_AND_COMMENTS; + private readonly Rules $TOKEN_TYPES_SPACE; + private readonly Rules $TOKEN_TYPES_SPACE_AND_COMMENTS; private readonly CharacterStream $characterStream; private Position $startPosition; private int $offset = 0; private string $buffer = ''; - private ?TokenType $tokenTypeUnderCursor = null; + private ?Rule $tokenTypeUnderCursor = null; public function __construct(string $source) { - $this->TOKEN_TYPES_SPACE = TokenTypes::from( - TokenType::SPACE, - TokenType::END_OF_LINE + $this->TOKEN_TYPES_SPACE = Rules::from( + Rule::SPACE, + Rule::END_OF_LINE ); - $this->TOKEN_TYPES_SPACE_AND_COMMENTS = TokenTypes::from( - TokenType::SPACE, - TokenType::END_OF_LINE, - TokenType::COMMENT + $this->TOKEN_TYPES_SPACE_AND_COMMENTS = Rules::from( + Rule::SPACE, + Rule::END_OF_LINE, + Rule::COMMENT ); $this->characterStream = new CharacterStream($source); $this->startPosition = Position::zero(); } - public function getTokenTypeUnderCursor(): TokenType + public function getRuleUnderCursor(): Rule { assert($this->tokenTypeUnderCursor !== null); @@ -101,12 +101,12 @@ public function getCursorRange(): Range return $this->getStartPosition()->toRange($this->getEndPosition()); } - public function read(TokenType $tokenType): void + public function read(Rule $tokenType): void { if ($this->characterStream->isEnd()) { throw LexerException::becauseOfUnexpectedEndOfSource( - expectedTokenTypes: TokenTypes::from($tokenType), + expectedRules: Rules::from($tokenType), affectedRangeInSource: $this->characterStream->getCurrentPosition()->toRange() ); } @@ -117,7 +117,7 @@ public function read(TokenType $tokenType): void } throw LexerException::becauseOfUnexpectedCharacterSequence( - expectedTokenTypes: TokenTypes::from($tokenType), + expectedRules: Rules::from($tokenType), affectedRangeInSource: Range::from( $this->startPosition, $this->characterStream->getCurrentPosition() @@ -126,20 +126,20 @@ public function read(TokenType $tokenType): void ); } - public function readOneOf(TokenTypes $tokenTypes): void + public function readOneOf(Rules $tokenTypes): void { if ($this->characterStream->isEnd()) { throw LexerException::becauseOfUnexpectedEndOfSource( - expectedTokenTypes: $tokenTypes, + expectedRules: $tokenTypes, affectedRangeInSource: $this->characterStream->getCurrentPosition()->toRange() ); } - $foundTokenType = $this->extractOneOf($tokenTypes); - if ($foundTokenType === null) { + $foundRule = $this->extractOneOf($tokenTypes); + if ($foundRule === null) { throw LexerException::becauseOfUnexpectedCharacterSequence( - expectedTokenTypes: $tokenTypes, + expectedRules: $tokenTypes, affectedRangeInSource: Range::from( $this->startPosition, $this->characterStream->getPreviousPosition() @@ -148,10 +148,10 @@ public function readOneOf(TokenTypes $tokenTypes): void ); } - $this->tokenTypeUnderCursor = $foundTokenType; + $this->tokenTypeUnderCursor = $foundRule; } - public function probe(TokenType $tokenType): bool + public function probe(Rule $tokenType): bool { if ($this->characterStream->isEnd()) { @@ -169,7 +169,7 @@ public function probe(TokenType $tokenType): bool return false; } - public function probeOneOf(TokenTypes $tokenTypes): bool + public function probeOneOf(Rules $tokenTypes): bool { if ($this->characterStream->isEnd()) { return false; @@ -186,7 +186,7 @@ public function probeOneOf(TokenTypes $tokenTypes): bool return false; } - public function peek(TokenType $tokenType): bool + public function peek(Rule $tokenType): bool { if ($this->characterStream->isEnd()) { return false; @@ -199,24 +199,24 @@ public function peek(TokenType $tokenType): bool return $result; } - public function peekOneOf(TokenTypes $tokenTypes): ?TokenType + public function peekOneOf(Rules $tokenTypes): ?Rule { if ($this->characterStream->isEnd()) { return null; } $snapshot = $this->characterStream->makeSnapshot(); - $foundTokenType = $this->extractOneOf($tokenTypes); + $foundRule = $this->extractOneOf($tokenTypes); $this->characterStream->restoreSnapshot($snapshot); - return $foundTokenType; + return $foundRule; } - public function expect(TokenType $tokenType): void + public function expect(Rule $tokenType): void { if ($this->characterStream->isEnd()) { throw LexerException::becauseOfUnexpectedEndOfSource( - expectedTokenTypes: TokenTypes::from($tokenType), + expectedRules: Rules::from($tokenType), affectedRangeInSource: $this->characterStream->getCurrentPosition()->toRange() ); } @@ -224,7 +224,7 @@ public function expect(TokenType $tokenType): void $snapshot = $this->characterStream->makeSnapshot(); if ($this->extract($tokenType) === null) { throw LexerException::becauseOfUnexpectedCharacterSequence( - expectedTokenTypes: TokenTypes::from($tokenType), + expectedRules: Rules::from($tokenType), affectedRangeInSource: Range::from( $this->startPosition, $this->characterStream->getPreviousPosition() @@ -236,20 +236,20 @@ public function expect(TokenType $tokenType): void $this->characterStream->restoreSnapshot($snapshot); } - public function expectOneOf(TokenTypes $tokenTypes): TokenType + public function expectOneOf(Rules $tokenTypes): Rule { if ($this->characterStream->isEnd()) { throw LexerException::becauseOfUnexpectedEndOfSource( - expectedTokenTypes: $tokenTypes, + expectedRules: $tokenTypes, affectedRangeInSource: $this->characterStream->getCurrentPosition()->toRange() ); } $snapshot = $this->characterStream->makeSnapshot(); - $foundTokenType = $this->extractOneOf($tokenTypes); - if ($foundTokenType === null) { + $foundRule = $this->extractOneOf($tokenTypes); + if ($foundRule === null) { throw LexerException::becauseOfUnexpectedCharacterSequence( - expectedTokenTypes: $tokenTypes, + expectedRules: $tokenTypes, affectedRangeInSource: Range::from( $this->startPosition, $this->characterStream->getPreviousPosition() @@ -260,7 +260,7 @@ public function expectOneOf(TokenTypes $tokenTypes): TokenType $this->characterStream->restoreSnapshot($snapshot); - return $foundTokenType; + return $foundRule; } public function skipSpace(): void @@ -273,7 +273,7 @@ public function skipSpaceAndComments(): void $this->skipAnyOf($this->TOKEN_TYPES_SPACE_AND_COMMENTS); } - private function skipAnyOf(TokenTypes $tokenTypes): void + private function skipAnyOf(Rules $tokenTypes): void { while (true) { $character = $this->characterStream->current(); @@ -291,7 +291,7 @@ private function skipAnyOf(TokenTypes $tokenTypes): void } } - private function extract(TokenType $tokenType): ?TokenType + private function extract(Rule $tokenType): ?Rule { $this->startPosition = $this->characterStream->getCurrentPosition(); $this->offset = 0; @@ -315,7 +315,7 @@ private function extract(TokenType $tokenType): ?TokenType } } - private function extractOneOf(TokenTypes $tokenTypes): ?TokenType + private function extractOneOf(Rules $tokenTypes): ?Rule { $this->startPosition = $this->characterStream->getCurrentPosition(); $this->offset = 0; @@ -325,7 +325,7 @@ private function extractOneOf(TokenTypes $tokenTypes): ?TokenType while (count($tokenTypeCandidates)) { $character = $this->characterStream->current(); - $nextTokenTypeCandidates = []; + $nextRuleCandidates = []; foreach ($tokenTypeCandidates as $tokenType) { $result = Matcher::for($tokenType)->match($character, $this->offset); @@ -334,13 +334,13 @@ private function extractOneOf(TokenTypes $tokenTypes): ?TokenType } if ($result === Result::KEEP) { - $nextTokenTypeCandidates[] = $tokenType; + $nextRuleCandidates[] = $tokenType; } } $this->offset++; $this->buffer .= $character; - $tokenTypeCandidates = $nextTokenTypeCandidates; + $tokenTypeCandidates = $nextRuleCandidates; $this->characterStream->next(); } diff --git a/src/Language/Lexer/LexerException.php b/src/Language/Lexer/LexerException.php index 6b376d0..61f5a68 100644 --- a/src/Language/Lexer/LexerException.php +++ b/src/Language/Lexer/LexerException.php @@ -22,7 +22,7 @@ namespace PackageFactory\ComponentEngine\Language\Lexer; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Language\Util\DebugHelper; use PackageFactory\ComponentEngine\Parser\Source\Range; @@ -44,21 +44,21 @@ private function __construct( } public static function becauseOfUnexpectedEndOfSource( - TokenTypes $expectedTokenTypes, + Rules $expectedRules, Range $affectedRangeInSource ): self { return new self( code: 1691489789, message: sprintf( 'Source ended unexpectedly. Expected %s instead.', - DebugHelper::describeTokenTypes($expectedTokenTypes) + DebugHelper::describeRules($expectedRules) ), affectedRangeInSource: $affectedRangeInSource ); } public static function becauseOfUnexpectedCharacterSequence( - TokenTypes $expectedTokenTypes, + Rules $expectedRules, Range $affectedRangeInSource, string $actualCharacterSequence ): self { @@ -67,7 +67,7 @@ public static function becauseOfUnexpectedCharacterSequence( message: sprintf( 'Unexpected character sequence "%s" was encountered. Expected %s instead.', $actualCharacterSequence, - DebugHelper::describeTokenTypes($expectedTokenTypes) + DebugHelper::describeRules($expectedRules) ), affectedRangeInSource: $affectedRangeInSource ); diff --git a/src/Language/Lexer/Matcher/Matcher.php b/src/Language/Lexer/Matcher/Matcher.php index 88e8d69..e6c66f5 100644 --- a/src/Language/Lexer/Matcher/Matcher.php +++ b/src/Language/Lexer/Matcher/Matcher.php @@ -28,160 +28,160 @@ use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Not\Not; use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Optional\Optional; use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Sequence\Sequence; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; abstract class Matcher { /** * @var array */ - private static $instancesByTokenType = []; + private static $instancesByRule = []; - final public static function for(TokenType $tokenType): self + final public static function for(Rule $tokenType): self { - return self::$instancesByTokenType[$tokenType->value] ??= match ($tokenType) { - TokenType::COMMENT => + return self::$instancesByRule[$tokenType->value] ??= match ($tokenType) { + Rule::COMMENT => new Sequence( new Exact('#'), new Optional(new Not(new Exact("\n"))) ), - TokenType::KEYWORD_FROM => + Rule::KEYWORD_FROM => new Exact('from'), - TokenType::KEYWORD_IMPORT => + Rule::KEYWORD_IMPORT => new Exact('import'), - TokenType::KEYWORD_EXPORT => + Rule::KEYWORD_EXPORT => new Exact('export'), - TokenType::KEYWORD_ENUM => + Rule::KEYWORD_ENUM => new Exact('enum'), - TokenType::KEYWORD_STRUCT => + Rule::KEYWORD_STRUCT => new Exact('struct'), - TokenType::KEYWORD_COMPONENT => + Rule::KEYWORD_COMPONENT => new Exact('component'), - TokenType::KEYWORD_MATCH => + Rule::KEYWORD_MATCH => new Exact('match'), - TokenType::KEYWORD_DEFAULT => + Rule::KEYWORD_DEFAULT => new Exact('default'), - TokenType::KEYWORD_RETURN => + Rule::KEYWORD_RETURN => new Exact('return'), - TokenType::KEYWORD_TRUE => + Rule::KEYWORD_TRUE => new Exact('true'), - TokenType::KEYWORD_FALSE => + Rule::KEYWORD_FALSE => new Exact('false'), - TokenType::KEYWORD_NULL => + Rule::KEYWORD_NULL => new Exact('null'), - TokenType::STRING_LITERAL_DELIMITER => + Rule::STRING_LITERAL_DELIMITER => new Exact('"'), - TokenType::STRING_LITERAL_CONTENT => + Rule::STRING_LITERAL_CONTENT => new Not(new Characters('"\\')), - TokenType::INTEGER_BINARY => + Rule::INTEGER_BINARY => new Sequence(new Exact('0b'), new Characters('01')), - TokenType::INTEGER_OCTAL => + Rule::INTEGER_OCTAL => new Sequence(new Exact('0o'), new Characters('01234567')), - TokenType::INTEGER_DECIMAL => + Rule::INTEGER_DECIMAL => new Characters('0123456789', 'box'), - TokenType::INTEGER_HEXADECIMAL => + Rule::INTEGER_HEXADECIMAL => new Sequence(new Exact('0x'), new Characters('0123456789ABCDEF')), - TokenType::TEMPLATE_LITERAL_DELIMITER => + Rule::TEMPLATE_LITERAL_DELIMITER => new Exact('"""'), - TokenType::TEMPLATE_LITERAL_CONTENT => + Rule::TEMPLATE_LITERAL_CONTENT => new Not(new Characters('{}\\' . "\n")), - TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER => + Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER => new Sequence( new Exact('\\'), new Fixed(1, new Characters('nrtvef\\$"')) ), - TokenType::ESCAPE_SEQUENCE_HEXADECIMAL => + Rule::ESCAPE_SEQUENCE_HEXADECIMAL => new Sequence( new Exact('\\x'), new Fixed(2, new Characters('abcdefABCDEF0123456789')) ), - TokenType::ESCAPE_SEQUENCE_UNICODE => + Rule::ESCAPE_SEQUENCE_UNICODE => new Sequence( new Exact('\\u'), new Fixed(4, new Characters('abcdefABCDEF0123456789')) ), - TokenType::ESCAPE_SEQUENCE_UNICODE_CODEPOINT => + Rule::ESCAPE_SEQUENCE_UNICODE_CODEPOINT => new Sequence( new Exact('\\u{'), new Characters('abcdefABCDEF0123456789'), new Exact('}') ), - TokenType::BRACKET_CURLY_OPEN => + Rule::BRACKET_CURLY_OPEN => new Exact('{'), - TokenType::BRACKET_CURLY_CLOSE => + Rule::BRACKET_CURLY_CLOSE => new Exact('}'), - TokenType::BRACKET_ROUND_OPEN => + Rule::BRACKET_ROUND_OPEN => new Exact('('), - TokenType::BRACKET_ROUND_CLOSE => + Rule::BRACKET_ROUND_CLOSE => new Exact(')'), - TokenType::BRACKET_SQUARE_OPEN => + Rule::BRACKET_SQUARE_OPEN => new Exact('['), - TokenType::BRACKET_SQUARE_CLOSE => + Rule::BRACKET_SQUARE_CLOSE => new Exact(']'), - TokenType::BRACKET_ANGLE_OPEN => + Rule::BRACKET_ANGLE_OPEN => new Exact('<'), - TokenType::BRACKET_ANGLE_CLOSE => + Rule::BRACKET_ANGLE_CLOSE => new Exact('>'), - TokenType::SYMBOL_COLON => + Rule::SYMBOL_COLON => new Exact(':'), - TokenType::SYMBOL_PERIOD => + Rule::SYMBOL_PERIOD => new Exact('.'), - TokenType::SYMBOL_QUESTIONMARK => + Rule::SYMBOL_QUESTIONMARK => new Exact('?'), - TokenType::SYMBOL_EXCLAMATIONMARK => + Rule::SYMBOL_EXCLAMATIONMARK => new Exact('!'), - TokenType::SYMBOL_COMMA => + Rule::SYMBOL_COMMA => new Exact(','), - TokenType::SYMBOL_DASH => + Rule::SYMBOL_DASH => new Exact('-'), - TokenType::SYMBOL_EQUALS => + Rule::SYMBOL_EQUALS => new Exact('='), - TokenType::SYMBOL_SLASH_FORWARD => + Rule::SYMBOL_SLASH_FORWARD => new Exact('/'), - TokenType::SYMBOL_PIPE => + Rule::SYMBOL_PIPE => new Exact('|'), - TokenType::SYMBOL_BOOLEAN_AND => + Rule::SYMBOL_BOOLEAN_AND => new Exact('&&'), - TokenType::SYMBOL_BOOLEAN_OR => + Rule::SYMBOL_BOOLEAN_OR => new Exact('||'), - TokenType::SYMBOL_STRICT_EQUALS => + Rule::SYMBOL_STRICT_EQUALS => new Exact('==='), - TokenType::SYMBOL_NOT_EQUALS => + Rule::SYMBOL_NOT_EQUALS => new Exact('!=='), - TokenType::SYMBOL_GREATER_THAN => + Rule::SYMBOL_GREATER_THAN => new Exact('>'), - TokenType::SYMBOL_GREATER_THAN_OR_EQUAL => + Rule::SYMBOL_GREATER_THAN_OR_EQUAL => new Exact('>='), - TokenType::SYMBOL_LESS_THAN => + Rule::SYMBOL_LESS_THAN => new Exact('<'), - TokenType::SYMBOL_LESS_THAN_OR_EQUAL => + Rule::SYMBOL_LESS_THAN_OR_EQUAL => new Exact('<='), - TokenType::SYMBOL_ARROW_SINGLE => + Rule::SYMBOL_ARROW_SINGLE => new Exact('->'), - TokenType::SYMBOL_OPTCHAIN => + Rule::SYMBOL_OPTCHAIN => new Exact('?.'), - TokenType::SYMBOL_NULLISH_COALESCE => + Rule::SYMBOL_NULLISH_COALESCE => new Exact('??'), - TokenType::SYMBOL_CLOSE_TAG => + Rule::SYMBOL_CLOSE_TAG => new Exact(' + Rule::WORD => new Characters( 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' ), - TokenType::TEXT => + Rule::TEXT => new Not(new Characters('<{}>' . " \t\n")), - TokenType::SPACE => + Rule::SPACE => new Characters(" \t"), - TokenType::END_OF_LINE => + Rule::END_OF_LINE => new Exact("\n") }; } diff --git a/src/Language/Lexer/Token/TokenType.php b/src/Language/Lexer/Rule/Rule.php similarity index 97% rename from src/Language/Lexer/Token/TokenType.php rename to src/Language/Lexer/Rule/Rule.php index 3fdbd7e..1f67a0b 100644 --- a/src/Language/Lexer/Token/TokenType.php +++ b/src/Language/Lexer/Rule/Rule.php @@ -20,9 +20,9 @@ declare(strict_types=1); -namespace PackageFactory\ComponentEngine\Language\Lexer\Token; +namespace PackageFactory\ComponentEngine\Language\Lexer\Rule; -enum TokenType: string +enum Rule: string { case COMMENT = 'COMMENT'; diff --git a/src/Language/Lexer/Token/TokenTypes.php b/src/Language/Lexer/Rule/Rules.php similarity index 81% rename from src/Language/Lexer/Token/TokenTypes.php rename to src/Language/Lexer/Rule/Rules.php index 60b2322..947e2ed 100644 --- a/src/Language/Lexer/Token/TokenTypes.php +++ b/src/Language/Lexer/Rule/Rules.php @@ -20,23 +20,23 @@ declare(strict_types=1); -namespace PackageFactory\ComponentEngine\Language\Lexer\Token; +namespace PackageFactory\ComponentEngine\Language\Lexer\Rule; -final class TokenTypes +final class Rules { /** - * @var TokenType[] + * @var Rule[] */ public readonly array $items; - private function __construct(TokenType ...$items) + private function __construct(Rule ...$items) { assert(count($items) > 0); $this->items = $items; } - public static function from(TokenType ...$items): self + public static function from(Rule ...$items): self { $items = array_unique($items, SORT_REGULAR); $items = array_values($items); @@ -44,7 +44,7 @@ public static function from(TokenType ...$items): self return new self(...$items); } - public function contains(TokenType $needle): bool + public function contains(Rule $needle): bool { return in_array($needle, $this->items); } diff --git a/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php b/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php index b2ac8ad..bbf39d7 100644 --- a/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php +++ b/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php @@ -25,20 +25,20 @@ use PackageFactory\ComponentEngine\Framework\PHP\Singleton\Singleton; use PackageFactory\ComponentEngine\Language\AST\Node\BooleanLiteral\BooleanLiteralNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; final class BooleanLiteralParser { use Singleton; - private static TokenTypes $TOKEN_TYPES_BOOLEAN_KEYWORDS; + private static Rules $TOKEN_TYPES_BOOLEAN_KEYWORDS; private function __construct() { - self::$TOKEN_TYPES_BOOLEAN_KEYWORDS ??= TokenTypes::from( - TokenType::KEYWORD_TRUE, - TokenType::KEYWORD_FALSE + self::$TOKEN_TYPES_BOOLEAN_KEYWORDS ??= Rules::from( + Rule::KEYWORD_TRUE, + Rule::KEYWORD_FALSE ); } @@ -48,7 +48,7 @@ public function parse(Lexer $lexer): BooleanLiteralNode return new BooleanLiteralNode( rangeInSource: $lexer->getCursorRange(), - value: $lexer->getTokenTypeUnderCursor() === TokenType::KEYWORD_TRUE + value: $lexer->getRuleUnderCursor() === Rule::KEYWORD_TRUE ); } } diff --git a/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php b/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php index 2d0d909..acbe828 100644 --- a/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php +++ b/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php @@ -29,8 +29,8 @@ use PackageFactory\ComponentEngine\Language\AST\Node\Expression\ExpressionNode; use PackageFactory\ComponentEngine\Language\AST\Node\PropertyDeclaration\PropertyDeclarationNodes; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Language\Parser\Expression\ExpressionParser; use PackageFactory\ComponentEngine\Language\Parser\PropertyDeclaration\PropertyDeclarationParser; use PackageFactory\ComponentEngine\Parser\Source\Range; @@ -39,22 +39,22 @@ final class ComponentDeclarationParser { use Singleton; - private static TokenTypes $TOKEN_TYPES_SPACE; + private static Rules $TOKEN_TYPES_SPACE; private ?PropertyDeclarationParser $propertyDeclarationParser = null; private ?ExpressionParser $returnParser = null; private function __construct() { - self::$TOKEN_TYPES_SPACE ??= TokenTypes::from( - TokenType::SPACE, - TokenType::END_OF_LINE + self::$TOKEN_TYPES_SPACE ??= Rules::from( + Rule::SPACE, + Rule::END_OF_LINE ); } public function parse(Lexer $lexer): ComponentDeclarationNode { - $lexer->read(TokenType::KEYWORD_COMPONENT); + $lexer->read(Rule::KEYWORD_COMPONENT); $start = $lexer->getStartPosition(); $lexer->skipSpace(); @@ -62,7 +62,7 @@ public function parse(Lexer $lexer): ComponentDeclarationNode $props = $this->parseProps($lexer); $return = $this->parseReturn($lexer); - $lexer->read(TokenType::BRACKET_CURLY_CLOSE); + $lexer->read(Rule::BRACKET_CURLY_CLOSE); $end = $lexer->getEndPosition(); return new ComponentDeclarationNode( @@ -75,7 +75,7 @@ public function parse(Lexer $lexer): ComponentDeclarationNode private function parseName(Lexer $lexer): ComponentNameNode { - $lexer->read(TokenType::WORD); + $lexer->read(Rule::WORD); $componentNameNode = new ComponentNameNode( rangeInSource: $lexer->getCursorRange(), value: ComponentName::from($lexer->getBuffer()) @@ -90,12 +90,12 @@ private function parseProps(Lexer $lexer): PropertyDeclarationNodes { $this->propertyDeclarationParser ??= PropertyDeclarationParser::singleton(); - $lexer->read(TokenType::BRACKET_CURLY_OPEN); + $lexer->read(Rule::BRACKET_CURLY_OPEN); $lexer->skipSpaceAndComments(); $items = []; - while (!$lexer->peek(TokenType::KEYWORD_RETURN)) { - $lexer->expect(TokenType::WORD); + while (!$lexer->peek(Rule::KEYWORD_RETURN)) { + $lexer->expect(Rule::WORD); $items[] = $this->propertyDeclarationParser->parse($lexer); $lexer->skipSpaceAndComments(); } @@ -107,7 +107,7 @@ private function parseReturn(Lexer $lexer): ExpressionNode { $this->returnParser ??= new ExpressionParser(); - $lexer->read(TokenType::KEYWORD_RETURN); + $lexer->read(Rule::KEYWORD_RETURN); $lexer->readOneOf(self::$TOKEN_TYPES_SPACE); $lexer->skipSpaceAndComments(); diff --git a/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php b/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php index 57ca71f..70750b9 100644 --- a/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php +++ b/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php @@ -34,8 +34,8 @@ use PackageFactory\ComponentEngine\Language\AST\Node\IntegerLiteral\IntegerLiteralNode; use PackageFactory\ComponentEngine\Language\AST\Node\StringLiteral\StringLiteralNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Language\Parser\IntegerLiteral\IntegerLiteralParser; use PackageFactory\ComponentEngine\Language\Parser\StringLiteral\StringLiteralParser; use PackageFactory\ComponentEngine\Parser\Source\Range; @@ -44,25 +44,25 @@ final class EnumDeclarationParser { use Singleton; - private static TokenTypes $TOKEN_TYPES_ENUM_MEMBER_VALUE_START; + private static Rules $TOKEN_TYPES_ENUM_MEMBER_VALUE_START; private ?StringLiteralParser $stringLiteralParser = null; private ?IntegerLiteralParser $integerLiteralParser = null; private function __construct() { - self::$TOKEN_TYPES_ENUM_MEMBER_VALUE_START ??= TokenTypes::from( - TokenType::STRING_LITERAL_DELIMITER, - TokenType::INTEGER_BINARY, - TokenType::INTEGER_OCTAL, - TokenType::INTEGER_DECIMAL, - TokenType::INTEGER_HEXADECIMAL + self::$TOKEN_TYPES_ENUM_MEMBER_VALUE_START ??= Rules::from( + Rule::STRING_LITERAL_DELIMITER, + Rule::INTEGER_BINARY, + Rule::INTEGER_OCTAL, + Rule::INTEGER_DECIMAL, + Rule::INTEGER_HEXADECIMAL ); } public function parse(Lexer $lexer): EnumDeclarationNode { - $lexer->read(TokenType::KEYWORD_ENUM); + $lexer->read(Rule::KEYWORD_ENUM); $start = $lexer->getStartPosition(); $lexer->skipSpace(); @@ -80,7 +80,7 @@ public function parse(Lexer $lexer): EnumDeclarationNode private function parseEnumName(Lexer $lexer): EnumNameNode { - $lexer->read(TokenType::WORD); + $lexer->read(Rule::WORD); $enumNameNode = new EnumNameNode( rangeInSource: $lexer->getCursorRange(), value: EnumName::from($lexer->getBuffer()) @@ -92,15 +92,15 @@ private function parseEnumName(Lexer $lexer): EnumNameNode private function parseEnumMemberDeclarations(Lexer $lexer): EnumMemberDeclarationNodes { - $lexer->read(TokenType::BRACKET_CURLY_OPEN); + $lexer->read(Rule::BRACKET_CURLY_OPEN); $lexer->skipSpaceAndComments(); $items = []; - while (!$lexer->peek(TokenType::BRACKET_CURLY_CLOSE)) { + while (!$lexer->peek(Rule::BRACKET_CURLY_CLOSE)) { $items[] = $this->parseEnumMemberDeclaration($lexer); } - $lexer->read(TokenType::BRACKET_CURLY_CLOSE); + $lexer->read(Rule::BRACKET_CURLY_CLOSE); return new EnumMemberDeclarationNodes(...$items); } @@ -125,7 +125,7 @@ private function parseEnumMemberDeclaration(Lexer $lexer): EnumMemberDeclaration private function parseEnumMemberName(Lexer $lexer): EnumMemberNameNode { - $lexer->read(TokenType::WORD); + $lexer->read(Rule::WORD); return new EnumMemberNameNode( rangeInSource: $lexer->getCursorRange(), @@ -135,17 +135,17 @@ private function parseEnumMemberName(Lexer $lexer): EnumMemberNameNode private function parseEnumMemberValue(Lexer $lexer): ?EnumMemberValueNode { - if ($lexer->probe(TokenType::BRACKET_ROUND_OPEN)) { + if ($lexer->probe(Rule::BRACKET_ROUND_OPEN)) { $start = $lexer->getStartPosition(); $value = match ($lexer->expectOneOf(self::$TOKEN_TYPES_ENUM_MEMBER_VALUE_START)) { - TokenType::STRING_LITERAL_DELIMITER => + Rule::STRING_LITERAL_DELIMITER => $this->parseStringLiteral($lexer), default => $this->parseIntegerLiteral($lexer) }; - $lexer->read(TokenType::BRACKET_ROUND_CLOSE); + $lexer->read(Rule::BRACKET_ROUND_CLOSE); $end = $lexer->getEndPosition(); return new EnumMemberValueNode( diff --git a/src/Language/Parser/Export/ExportParser.php b/src/Language/Parser/Export/ExportParser.php index ae14bf4..645349a 100644 --- a/src/Language/Parser/Export/ExportParser.php +++ b/src/Language/Parser/Export/ExportParser.php @@ -30,8 +30,8 @@ use PackageFactory\ComponentEngine\Language\AST\Node\StructDeclaration\StructDeclarationNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\LexerException; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Language\Parser\ComponentDeclaration\ComponentDeclarationParser; use PackageFactory\ComponentEngine\Language\Parser\EnumDeclaration\EnumDeclarationParser; use PackageFactory\ComponentEngine\Language\Parser\StructDeclaration\StructDeclarationParser; @@ -41,7 +41,7 @@ final class ExportParser { use Singleton; - private static TokenTypes $TOKEN_TYPES_DECLARATION_KEYWORDS; + private static Rules $TOKEN_TYPES_DECLARATION_KEYWORDS; private ?ComponentDeclarationParser $componentDeclarationParser = null; private ?EnumDeclarationParser $enumDeclarationParser = null; @@ -49,25 +49,25 @@ final class ExportParser private function __construct() { - self::$TOKEN_TYPES_DECLARATION_KEYWORDS ??= TokenTypes::from( - TokenType::KEYWORD_COMPONENT, - TokenType::KEYWORD_ENUM, - TokenType::KEYWORD_STRUCT + self::$TOKEN_TYPES_DECLARATION_KEYWORDS ??= Rules::from( + Rule::KEYWORD_COMPONENT, + Rule::KEYWORD_ENUM, + Rule::KEYWORD_STRUCT ); } public function parse(Lexer $lexer): ExportNode { try { - $lexer->read(TokenType::KEYWORD_EXPORT); + $lexer->read(Rule::KEYWORD_EXPORT); $start = $lexer->getStartPosition(); $lexer->skipSpace(); $declaration = match ($lexer->expectOneOf(self::$TOKEN_TYPES_DECLARATION_KEYWORDS)) { - TokenType::KEYWORD_COMPONENT => $this->parseComponentDeclaration($lexer), - TokenType::KEYWORD_ENUM => $this->parseEnumDeclaration($lexer), - TokenType::KEYWORD_STRUCT => $this->parseStructDeclaration($lexer), + Rule::KEYWORD_COMPONENT => $this->parseComponentDeclaration($lexer), + Rule::KEYWORD_ENUM => $this->parseEnumDeclaration($lexer), + Rule::KEYWORD_STRUCT => $this->parseStructDeclaration($lexer), default => throw new LogicException() }; diff --git a/src/Language/Parser/Expression/ExpressionParser.php b/src/Language/Parser/Expression/ExpressionParser.php index abf6e57..da4d53e 100644 --- a/src/Language/Parser/Expression/ExpressionParser.php +++ b/src/Language/Parser/Expression/ExpressionParser.php @@ -34,8 +34,8 @@ use PackageFactory\ComponentEngine\Language\AST\Node\UnaryOperation\UnaryOperationNode; use PackageFactory\ComponentEngine\Language\AST\Node\UnaryOperation\UnaryOperator; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Language\Parser\BooleanLiteral\BooleanLiteralParser; use PackageFactory\ComponentEngine\Language\Parser\IntegerLiteral\IntegerLiteralParser; use PackageFactory\ComponentEngine\Language\Parser\Match\MatchParser; @@ -48,10 +48,10 @@ final class ExpressionParser { - private static TokenTypes $TOKEN_TYPES_ACCESS; - private static TokenTypes $TOKEN_TYPES_BINARY_OPERATORS; - private static TokenTypes $TOKEN_TYPES_UNARY; - private static TokenTypes $TOKEN_TYPES_CLOSING_DELIMITERS; + private static Rules $TOKEN_TYPES_ACCESS; + private static Rules $TOKEN_TYPES_BINARY_OPERATORS; + private static Rules $TOKEN_TYPES_UNARY; + private static Rules $TOKEN_TYPES_CLOSING_DELIMITERS; private ?BooleanLiteralParser $booleanLiteralParser = null; private ?IntegerLiteralParser $integerLiteralParser = null; @@ -65,41 +65,41 @@ final class ExpressionParser public function __construct( private Precedence $precedence = Precedence::SEQUENCE ) { - self::$TOKEN_TYPES_ACCESS ??= TokenTypes::from( - TokenType::SYMBOL_PERIOD, - TokenType::SYMBOL_OPTCHAIN + self::$TOKEN_TYPES_ACCESS ??= Rules::from( + Rule::SYMBOL_PERIOD, + Rule::SYMBOL_OPTCHAIN ); - self::$TOKEN_TYPES_BINARY_OPERATORS ??= TokenTypes::from( - TokenType::SYMBOL_NULLISH_COALESCE, - TokenType::SYMBOL_BOOLEAN_AND, - TokenType::SYMBOL_BOOLEAN_OR, - TokenType::SYMBOL_STRICT_EQUALS, - TokenType::SYMBOL_NOT_EQUALS, - TokenType::SYMBOL_GREATER_THAN, - TokenType::SYMBOL_LESS_THAN + self::$TOKEN_TYPES_BINARY_OPERATORS ??= Rules::from( + Rule::SYMBOL_NULLISH_COALESCE, + Rule::SYMBOL_BOOLEAN_AND, + Rule::SYMBOL_BOOLEAN_OR, + Rule::SYMBOL_STRICT_EQUALS, + Rule::SYMBOL_NOT_EQUALS, + Rule::SYMBOL_GREATER_THAN, + Rule::SYMBOL_LESS_THAN ); - self::$TOKEN_TYPES_UNARY ??= TokenTypes::from( - TokenType::SYMBOL_EXCLAMATIONMARK, - TokenType::KEYWORD_TRUE, - TokenType::KEYWORD_FALSE, - TokenType::KEYWORD_NULL, - TokenType::KEYWORD_MATCH, - TokenType::STRING_LITERAL_DELIMITER, - TokenType::INTEGER_HEXADECIMAL, - TokenType::INTEGER_DECIMAL, - TokenType::INTEGER_OCTAL, - TokenType::INTEGER_BINARY, - TokenType::WORD, - TokenType::BRACKET_ANGLE_OPEN, - TokenType::BRACKET_ROUND_OPEN + self::$TOKEN_TYPES_UNARY ??= Rules::from( + Rule::SYMBOL_EXCLAMATIONMARK, + Rule::KEYWORD_TRUE, + Rule::KEYWORD_FALSE, + Rule::KEYWORD_NULL, + Rule::KEYWORD_MATCH, + Rule::STRING_LITERAL_DELIMITER, + Rule::INTEGER_HEXADECIMAL, + Rule::INTEGER_DECIMAL, + Rule::INTEGER_OCTAL, + Rule::INTEGER_BINARY, + Rule::WORD, + Rule::BRACKET_ANGLE_OPEN, + Rule::BRACKET_ROUND_OPEN ); - self::$TOKEN_TYPES_CLOSING_DELIMITERS = TokenTypes::from( - TokenType::BRACKET_CURLY_OPEN, - TokenType::BRACKET_CURLY_CLOSE, - TokenType::BRACKET_ROUND_CLOSE, - TokenType::SYMBOL_COLON, - TokenType::SYMBOL_COMMA, - TokenType::SYMBOL_ARROW_SINGLE + self::$TOKEN_TYPES_CLOSING_DELIMITERS = Rules::from( + Rule::BRACKET_CURLY_OPEN, + Rule::BRACKET_CURLY_CLOSE, + Rule::BRACKET_ROUND_CLOSE, + Rule::SYMBOL_COLON, + Rule::SYMBOL_COMMA, + Rule::SYMBOL_ARROW_SINGLE ); } @@ -119,8 +119,8 @@ public function parse(Lexer $lexer): ExpressionNode continue; } - if ($lexer->peek(TokenType::SYMBOL_QUESTIONMARK)) { - if ($this->precedence->mustStopAt(TokenType::SYMBOL_QUESTIONMARK)) { + if ($lexer->peek(Rule::SYMBOL_QUESTIONMARK)) { + if ($this->precedence->mustStopAt(Rule::SYMBOL_QUESTIONMARK)) { return $result; } @@ -145,31 +145,31 @@ public function parse(Lexer $lexer): ExpressionNode private function parseUnaryStatement(Lexer $lexer): ExpressionNode { - if ($lexer->peek(TokenType::TEMPLATE_LITERAL_DELIMITER)) { + if ($lexer->peek(Rule::TEMPLATE_LITERAL_DELIMITER)) { $result = $this->parseTemplateLiteral($lexer); } else { $result = match ($lexer->expectOneOf(self::$TOKEN_TYPES_UNARY)) { - TokenType::SYMBOL_EXCLAMATIONMARK => + Rule::SYMBOL_EXCLAMATIONMARK => $this->parseUnaryOperation($lexer), - TokenType::KEYWORD_TRUE, - TokenType::KEYWORD_FALSE => + Rule::KEYWORD_TRUE, + Rule::KEYWORD_FALSE => $this->parseBooleanLiteral($lexer), - TokenType::KEYWORD_NULL => + Rule::KEYWORD_NULL => $this->parseNullLiteral($lexer), - TokenType::STRING_LITERAL_DELIMITER => + Rule::STRING_LITERAL_DELIMITER => $this->parseStringLiteral($lexer), - TokenType::INTEGER_HEXADECIMAL, - TokenType::INTEGER_DECIMAL, - TokenType::INTEGER_OCTAL, - TokenType::INTEGER_BINARY => + Rule::INTEGER_HEXADECIMAL, + Rule::INTEGER_DECIMAL, + Rule::INTEGER_OCTAL, + Rule::INTEGER_BINARY => $this->parseIntegerLiteral($lexer), - TokenType::WORD => + Rule::WORD => $this->parseValueReference($lexer), - TokenType::BRACKET_ANGLE_OPEN => + Rule::BRACKET_ANGLE_OPEN => $this->parseTag($lexer), - TokenType::KEYWORD_MATCH => + Rule::KEYWORD_MATCH => $this->parseMatch($lexer), - TokenType::BRACKET_ROUND_OPEN => + Rule::BRACKET_ROUND_OPEN => $this->parseBracketedExpression($lexer), default => throw new LogicException() }; @@ -204,7 +204,7 @@ private function parseUnaryOperation(Lexer $lexer): ExpressionNode private function parseUnaryOperator(Lexer $lexer): UnaryOperator { - $lexer->read(TokenType::SYMBOL_EXCLAMATIONMARK); + $lexer->read(Rule::SYMBOL_EXCLAMATIONMARK); $unaryOperator = UnaryOperator::NOT; @@ -318,13 +318,13 @@ private function parseMatch(Lexer $lexer): ExpressionNode private function parseBracketedExpression(Lexer $lexer): ExpressionNode { - $lexer->read(TokenType::BRACKET_ROUND_OPEN); + $lexer->read(Rule::BRACKET_ROUND_OPEN); $start = $lexer->getStartPosition(); $lexer->skipSpaceAndComments(); $innerExpressionNode = $this->parse($lexer); - $lexer->read(TokenType::BRACKET_ROUND_CLOSE); + $lexer->read(Rule::BRACKET_ROUND_CLOSE); $end = $lexer->getEndPosition(); $lexer->skipSpaceAndComments(); @@ -339,7 +339,7 @@ private function parseAcccess(Lexer $lexer, ExpressionNode $parent): ExpressionN while (!$lexer->isEnd()) { $type = $this->parseAccessType($lexer); - $lexer->read(TokenType::WORD); + $lexer->read(Rule::WORD); $accessNode = new AccessNode( rangeInSource: $parent->rangeInSource->start->toRange( $lexer->getEndPosition() @@ -369,9 +369,9 @@ private function parseAcccess(Lexer $lexer, ExpressionNode $parent): ExpressionN private function parseAccessType(Lexer $lexer): AccessType { - return match ($lexer->getTokenTypeUnderCursor()) { - TokenType::SYMBOL_PERIOD => AccessType::MANDATORY, - TokenType::SYMBOL_OPTCHAIN => AccessType::OPTIONAL, + return match ($lexer->getRuleUnderCursor()) { + Rule::SYMBOL_PERIOD => AccessType::MANDATORY, + Rule::SYMBOL_OPTCHAIN => AccessType::OPTIONAL, default => throw new LogicException() }; } @@ -400,25 +400,25 @@ private function parseBinaryOperation(Lexer $lexer, ExpressionNode $leftOperand) private function parseBinaryOperator(Lexer $lexer): BinaryOperator { - if ($lexer->probe(TokenType::SYMBOL_GREATER_THAN_OR_EQUAL)) { + if ($lexer->probe(Rule::SYMBOL_GREATER_THAN_OR_EQUAL)) { $lexer->skipSpaceAndComments(); return BinaryOperator::GREATER_THAN_OR_EQUAL; } - if ($lexer->probe(TokenType::SYMBOL_LESS_THAN_OR_EQUAL)) { + if ($lexer->probe(Rule::SYMBOL_LESS_THAN_OR_EQUAL)) { $lexer->skipSpaceAndComments(); return BinaryOperator::LESS_THAN_OR_EQUAL; } $lexer->readOneOf(self::$TOKEN_TYPES_BINARY_OPERATORS); - $operator = match ($lexer->getTokenTypeUnderCursor()) { - TokenType::SYMBOL_NULLISH_COALESCE => BinaryOperator::NULLISH_COALESCE, - TokenType::SYMBOL_BOOLEAN_AND => BinaryOperator::AND, - TokenType::SYMBOL_BOOLEAN_OR => BinaryOperator::OR, - TokenType::SYMBOL_STRICT_EQUALS => BinaryOperator::EQUAL, - TokenType::SYMBOL_NOT_EQUALS => BinaryOperator::NOT_EQUAL, - TokenType::SYMBOL_GREATER_THAN => BinaryOperator::GREATER_THAN, - TokenType::SYMBOL_LESS_THAN => BinaryOperator::LESS_THAN, + $operator = match ($lexer->getRuleUnderCursor()) { + Rule::SYMBOL_NULLISH_COALESCE => BinaryOperator::NULLISH_COALESCE, + Rule::SYMBOL_BOOLEAN_AND => BinaryOperator::AND, + Rule::SYMBOL_BOOLEAN_OR => BinaryOperator::OR, + Rule::SYMBOL_STRICT_EQUALS => BinaryOperator::EQUAL, + Rule::SYMBOL_NOT_EQUALS => BinaryOperator::NOT_EQUAL, + Rule::SYMBOL_GREATER_THAN => BinaryOperator::GREATER_THAN, + Rule::SYMBOL_LESS_THAN => BinaryOperator::LESS_THAN, default => throw new LogicException() }; @@ -429,12 +429,12 @@ private function parseBinaryOperator(Lexer $lexer): BinaryOperator private function parseTernaryOperation(Lexer $lexer, ExpressionNode $condition): ExpressionNode { - $lexer->read(TokenType::SYMBOL_QUESTIONMARK); + $lexer->read(Rule::SYMBOL_QUESTIONMARK); $lexer->skipSpaceAndComments(); $trueBranch = $this->parse($lexer); - $lexer->read(TokenType::SYMBOL_COLON); + $lexer->read(Rule::SYMBOL_COLON); $lexer->skipSpaceAndComments(); $falseBranch = $this->parse($lexer); diff --git a/src/Language/Parser/Expression/Precedence.php b/src/Language/Parser/Expression/Precedence.php index 06c0b17..b13a217 100644 --- a/src/Language/Parser/Expression/Precedence.php +++ b/src/Language/Parser/Expression/Precedence.php @@ -23,7 +23,7 @@ namespace PackageFactory\ComponentEngine\Language\Parser\Expression; use PackageFactory\ComponentEngine\Language\AST\Node\BinaryOperation\BinaryOperator; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; enum Precedence: int { @@ -40,33 +40,33 @@ enum Precedence: int case TERNARY = 3; case SEQUENCE = 1; - public static function forTokenType(TokenType $tokenType): self + public static function forRule(Rule $tokenType): self { return match ($tokenType) { - TokenType::BRACKET_ROUND_OPEN, - TokenType::BRACKET_ROUND_CLOSE, - TokenType::BRACKET_SQUARE_OPEN, - TokenType::BRACKET_SQUARE_CLOSE, - TokenType::SYMBOL_OPTCHAIN, - TokenType::SYMBOL_PERIOD => self::ACCESS, + Rule::BRACKET_ROUND_OPEN, + Rule::BRACKET_ROUND_CLOSE, + Rule::BRACKET_SQUARE_OPEN, + Rule::BRACKET_SQUARE_CLOSE, + Rule::SYMBOL_OPTCHAIN, + Rule::SYMBOL_PERIOD => self::ACCESS, - TokenType::SYMBOL_EXCLAMATIONMARK => self::UNARY, + Rule::SYMBOL_EXCLAMATIONMARK => self::UNARY, - TokenType::SYMBOL_GREATER_THAN, - TokenType::SYMBOL_GREATER_THAN_OR_EQUAL, - TokenType::SYMBOL_LESS_THAN, - TokenType::SYMBOL_LESS_THAN_OR_EQUAL => self::COMPARISON, + Rule::SYMBOL_GREATER_THAN, + Rule::SYMBOL_GREATER_THAN_OR_EQUAL, + Rule::SYMBOL_LESS_THAN, + Rule::SYMBOL_LESS_THAN_OR_EQUAL => self::COMPARISON, - TokenType::SYMBOL_STRICT_EQUALS, - TokenType::SYMBOL_NOT_EQUALS => self::EQUALITY, + Rule::SYMBOL_STRICT_EQUALS, + Rule::SYMBOL_NOT_EQUALS => self::EQUALITY, - TokenType::SYMBOL_BOOLEAN_AND => self::LOGICAL_AND, + Rule::SYMBOL_BOOLEAN_AND => self::LOGICAL_AND, - TokenType::SYMBOL_NULLISH_COALESCE, - TokenType::SYMBOL_BOOLEAN_OR => self::LOGICAL_OR, + Rule::SYMBOL_NULLISH_COALESCE, + Rule::SYMBOL_BOOLEAN_OR => self::LOGICAL_OR, - TokenType::SYMBOL_QUESTIONMARK, - TokenType::SYMBOL_COLON => self::TERNARY, + Rule::SYMBOL_QUESTIONMARK, + Rule::SYMBOL_COLON => self::TERNARY, default => self::SEQUENCE }; @@ -90,8 +90,8 @@ public static function forBinaryOperator(BinaryOperator $binaryOperator): self }; } - public function mustStopAt(TokenType $tokenType): bool + public function mustStopAt(Rule $tokenType): bool { - return self::forTokenType($tokenType)->value <= $this->value; + return self::forRule($tokenType)->value <= $this->value; } } diff --git a/src/Language/Parser/Import/ImportParser.php b/src/Language/Parser/Import/ImportParser.php index 1a09067..be28219 100644 --- a/src/Language/Parser/Import/ImportParser.php +++ b/src/Language/Parser/Import/ImportParser.php @@ -31,9 +31,9 @@ use PackageFactory\ComponentEngine\Language\AST\Node\StringLiteral\StringLiteralNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\LexerException; -use PackageFactory\ComponentEngine\Language\Lexer\Token\Token; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Token; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Language\Parser\StringLiteral\StringLiteralParser; use PackageFactory\ComponentEngine\Parser\Source\Range; @@ -41,29 +41,29 @@ final class ImportParser { use Singleton; - private static TokenTypes $TOKEN_TYPES_NAME_BOUNDARIES; + private static Rules $TOKEN_TYPES_NAME_BOUNDARIES; private ?StringLiteralParser $pathParser = null; private function __construct() { - self::$TOKEN_TYPES_NAME_BOUNDARIES ??= TokenTypes::from( - TokenType::WORD, - TokenType::SYMBOL_COMMA, - TokenType::BRACKET_CURLY_CLOSE + self::$TOKEN_TYPES_NAME_BOUNDARIES ??= Rules::from( + Rule::WORD, + Rule::SYMBOL_COMMA, + Rule::BRACKET_CURLY_CLOSE ); } public function parse(Lexer $lexer): ImportNode { try { - $lexer->read(TokenType::KEYWORD_FROM); + $lexer->read(Rule::KEYWORD_FROM); $start = $lexer->getStartPosition(); $lexer->skipSpace(); $path = $this->parsePath($lexer); - $lexer->read(TokenType::KEYWORD_IMPORT); + $lexer->read(Rule::KEYWORD_IMPORT); $lexer->skipSpace(); $names = $this->parseNames($lexer); @@ -91,27 +91,27 @@ private function parsePath(Lexer $lexer): StringLiteralNode private function parseNames(Lexer $lexer): ImportedNameNodes { - $lexer->read(TokenType::BRACKET_CURLY_OPEN); + $lexer->read(Rule::BRACKET_CURLY_OPEN); $start = $lexer->getStartPosition(); $lexer->skipSpaceAndComments(); $nameNodes = []; - while (!$lexer->peek(TokenType::BRACKET_CURLY_CLOSE)) { - $lexer->read(TokenType::WORD); + while (!$lexer->peek(Rule::BRACKET_CURLY_CLOSE)) { + $lexer->read(Rule::WORD); $nameNodes[] = new ImportedNameNode( rangeInSource: $lexer->getCursorRange(), value: VariableName::from($lexer->getBuffer()) ); $lexer->skipSpaceAndComments(); - if ($lexer->probe(TokenType::SYMBOL_COMMA)) { + if ($lexer->probe(Rule::SYMBOL_COMMA)) { $lexer->skipSpaceAndComments(); } else { break; } } - $lexer->read(TokenType::BRACKET_CURLY_CLOSE); + $lexer->read(Rule::BRACKET_CURLY_CLOSE); $end = $lexer->getEndPosition(); try { @@ -126,7 +126,7 @@ private function parseNames(Lexer $lexer): ImportedNameNodes public function parseName(Lexer $lexer): ImportedNameNode { - $lexer->read(TokenType::WORD); + $lexer->read(Rule::WORD); return new ImportedNameNode( rangeInSource: $lexer->getCursorRange(), diff --git a/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php b/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php index 0799763..5cb1dd2 100644 --- a/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php +++ b/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php @@ -28,8 +28,8 @@ use PackageFactory\ComponentEngine\Language\AST\Node\IntegerLiteral\IntegerLiteralNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\LexerException; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Language\Util\DebugHelper; use PackageFactory\ComponentEngine\Parser\Source\Range; @@ -37,15 +37,15 @@ final class IntegerLiteralParser { use Singleton; - private static TokenTypes $INTEGER_TOKEN_TYPES; + private static Rules $INTEGER_TOKEN_TYPES; private function __construct() { - self::$INTEGER_TOKEN_TYPES ??= TokenTypes::from( - TokenType::INTEGER_HEXADECIMAL, - TokenType::INTEGER_DECIMAL, - TokenType::INTEGER_OCTAL, - TokenType::INTEGER_BINARY + self::$INTEGER_TOKEN_TYPES ??= Rules::from( + Rule::INTEGER_HEXADECIMAL, + Rule::INTEGER_DECIMAL, + Rule::INTEGER_OCTAL, + Rule::INTEGER_BINARY ); } @@ -56,7 +56,7 @@ public function parse(Lexer $lexer): IntegerLiteralNode return new IntegerLiteralNode( rangeInSource: $lexer->getCursorRange(), - format: $this->getIntegerFormatFromToken($lexer->getTokenTypeUnderCursor()), + format: $this->getIntegerFormatFromToken($lexer->getRuleUnderCursor()), value: $lexer->getBuffer() ); } catch (LexerException $e) { @@ -64,18 +64,18 @@ public function parse(Lexer $lexer): IntegerLiteralNode } } - private function getIntegerFormatFromToken(TokenType $tokenType): IntegerFormat + private function getIntegerFormatFromToken(Rule $tokenType): IntegerFormat { return match ($tokenType) { - TokenType::INTEGER_BINARY => IntegerFormat::BINARY, - TokenType::INTEGER_OCTAL => IntegerFormat::OCTAL, - TokenType::INTEGER_DECIMAL => IntegerFormat::DECIMAL, - TokenType::INTEGER_HEXADECIMAL => IntegerFormat::HEXADECIMAL, + Rule::INTEGER_BINARY => IntegerFormat::BINARY, + Rule::INTEGER_OCTAL => IntegerFormat::OCTAL, + Rule::INTEGER_DECIMAL => IntegerFormat::DECIMAL, + Rule::INTEGER_HEXADECIMAL => IntegerFormat::HEXADECIMAL, default => throw new LogicException( sprintf( 'Expected %s to be one of %s', $tokenType->value, - DebugHelper::describeTokenTypes($this->INTEGER_TOKEN_TYPES) + DebugHelper::describeRules($this->INTEGER_TOKEN_TYPES) ) ) }; diff --git a/src/Language/Parser/Match/MatchParser.php b/src/Language/Parser/Match/MatchParser.php index d9a2a84..8754d69 100644 --- a/src/Language/Parser/Match/MatchParser.php +++ b/src/Language/Parser/Match/MatchParser.php @@ -30,7 +30,7 @@ use PackageFactory\ComponentEngine\Language\AST\Node\Match\MatchArmNodes; use PackageFactory\ComponentEngine\Language\AST\Node\Match\MatchNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; use PackageFactory\ComponentEngine\Language\Parser\Expression\ExpressionParser; use PackageFactory\ComponentEngine\Parser\Source\Range; @@ -44,7 +44,7 @@ final class MatchParser public function parse(Lexer $lexer): MatchNode { - $lexer->read(TokenType::KEYWORD_MATCH); + $lexer->read(Rule::KEYWORD_MATCH); $start = $lexer->getStartPosition(); $lexer->skipSpace(); @@ -68,18 +68,18 @@ private function parseSubject(Lexer $lexer): ExpressionNode private function parseArms(Lexer $lexer): MatchArmNodes { - $lexer->read(TokenType::BRACKET_CURLY_OPEN); + $lexer->read(Rule::BRACKET_CURLY_OPEN); $start = $lexer->getStartPosition(); $items = []; - while (!$lexer->peek(TokenType::BRACKET_CURLY_CLOSE)) { + while (!$lexer->peek(Rule::BRACKET_CURLY_CLOSE)) { $lexer->skipSpaceAndComments(); $items[] = $this->parseArm($lexer); } $lexer->skipSpaceAndComments(); - $lexer->read(TokenType::BRACKET_CURLY_CLOSE); + $lexer->read(Rule::BRACKET_CURLY_CLOSE); $end = $lexer->getEndPosition(); try { @@ -99,7 +99,7 @@ private function parseArm(Lexer $lexer): MatchArmNode $lexer->getStartPosition(); $lexer->skipSpaceAndComments(); - $lexer->read(TokenType::SYMBOL_ARROW_SINGLE); + $lexer->read(Rule::SYMBOL_ARROW_SINGLE); $lexer->skipSpaceAndComments(); $right = $this->parseArmRight($lexer); @@ -114,7 +114,7 @@ private function parseArm(Lexer $lexer): MatchArmNode private function parseArmLeft(Lexer $lexer): ?ExpressionNodes { - if ($lexer->probe(TokenType::KEYWORD_DEFAULT)) { + if ($lexer->probe(Rule::KEYWORD_DEFAULT)) { return null; } @@ -125,7 +125,7 @@ private function parseArmLeft(Lexer $lexer): ?ExpressionNodes $lexer->skipSpaceAndComments(); $items[] = $this->matchArmLeftParser->parse($lexer); $lexer->skipSpaceAndComments(); - } while ($lexer->probe(TokenType::SYMBOL_COMMA)); + } while ($lexer->probe(Rule::SYMBOL_COMMA)); $lexer->skipSpaceAndComments(); diff --git a/src/Language/Parser/Module/ModuleParser.php b/src/Language/Parser/Module/ModuleParser.php index eb6da24..7ed9205 100644 --- a/src/Language/Parser/Module/ModuleParser.php +++ b/src/Language/Parser/Module/ModuleParser.php @@ -29,7 +29,7 @@ use PackageFactory\ComponentEngine\Language\AST\Node\Module\ModuleNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\LexerException; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; use PackageFactory\ComponentEngine\Language\Parser\Export\ExportParser; use PackageFactory\ComponentEngine\Language\Parser\Import\ImportParser; use PackageFactory\ComponentEngine\Parser\Source\Position; @@ -69,7 +69,7 @@ public function parse(Lexer $lexer): ModuleNode private function parseImports(Lexer $lexer): ImportNodes { $items = []; - while ($lexer->peek(TokenType::KEYWORD_FROM)) { + while ($lexer->peek(Rule::KEYWORD_FROM)) { $items[] = $this->parseImport($lexer); } diff --git a/src/Language/Parser/NullLiteral/NullLiteralParser.php b/src/Language/Parser/NullLiteral/NullLiteralParser.php index 8ea36d8..5373cc4 100644 --- a/src/Language/Parser/NullLiteral/NullLiteralParser.php +++ b/src/Language/Parser/NullLiteral/NullLiteralParser.php @@ -25,7 +25,7 @@ use PackageFactory\ComponentEngine\Framework\PHP\Singleton\Singleton; use PackageFactory\ComponentEngine\Language\AST\Node\NullLiteral\NullLiteralNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; final class NullLiteralParser { @@ -33,7 +33,7 @@ final class NullLiteralParser public function parse(Lexer $lexer): NullLiteralNode { - $lexer->read(TokenType::KEYWORD_NULL); + $lexer->read(Rule::KEYWORD_NULL); return new NullLiteralNode( rangeInSource: $lexer->getCursorRange() diff --git a/src/Language/Parser/PropertyDeclaration/PropertyDeclarationParser.php b/src/Language/Parser/PropertyDeclaration/PropertyDeclarationParser.php index 50646a2..0cbed2e 100644 --- a/src/Language/Parser/PropertyDeclaration/PropertyDeclarationParser.php +++ b/src/Language/Parser/PropertyDeclaration/PropertyDeclarationParser.php @@ -27,7 +27,7 @@ use PackageFactory\ComponentEngine\Language\AST\Node\PropertyDeclaration\PropertyDeclarationNode; use PackageFactory\ComponentEngine\Language\AST\Node\PropertyDeclaration\PropertyNameNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; use PackageFactory\ComponentEngine\Language\Parser\TypeReference\TypeReferenceParser; use PackageFactory\ComponentEngine\Parser\Source\Range; @@ -41,7 +41,7 @@ public function parse(Lexer $lexer): PropertyDeclarationNode { $name = $this->parsePropertyName($lexer); - $lexer->read(TokenType::SYMBOL_COLON); + $lexer->read(Rule::SYMBOL_COLON); $lexer->skipSpace(); $this->typeReferenceParser ??= TypeReferenceParser::singleton(); @@ -59,7 +59,7 @@ public function parse(Lexer $lexer): PropertyDeclarationNode public function parsePropertyName(Lexer $lexer): PropertyNameNode { - $lexer->read(TokenType::WORD); + $lexer->read(Rule::WORD); return new PropertyNameNode( rangeInSource: $lexer->getCursorRange(), diff --git a/src/Language/Parser/StringLiteral/StringLiteralParser.php b/src/Language/Parser/StringLiteral/StringLiteralParser.php index 701166d..72d8998 100644 --- a/src/Language/Parser/StringLiteral/StringLiteralParser.php +++ b/src/Language/Parser/StringLiteral/StringLiteralParser.php @@ -25,7 +25,7 @@ use PackageFactory\ComponentEngine\Framework\PHP\Singleton\Singleton; use PackageFactory\ComponentEngine\Language\AST\Node\StringLiteral\StringLiteralNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; use PackageFactory\ComponentEngine\Parser\Source\Range; final class StringLiteralParser @@ -34,23 +34,23 @@ final class StringLiteralParser public function parse(Lexer $lexer): StringLiteralNode { - $lexer->read(TokenType::STRING_LITERAL_DELIMITER); + $lexer->read(Rule::STRING_LITERAL_DELIMITER); $start = $lexer->getStartPosition(); $value = ''; - while (!$lexer->peek(TokenType::STRING_LITERAL_DELIMITER)) { - if ($lexer->probe(TokenType::STRING_LITERAL_CONTENT)) { + while (!$lexer->peek(Rule::STRING_LITERAL_DELIMITER)) { + if ($lexer->probe(Rule::STRING_LITERAL_CONTENT)) { $value = $lexer->getBuffer(); } - if ($lexer->probe(TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER)) { + if ($lexer->probe(Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER)) { $value = $lexer->getBuffer(); } break; } - $lexer->read(TokenType::STRING_LITERAL_DELIMITER); + $lexer->read(Rule::STRING_LITERAL_DELIMITER); $end = $lexer->getEndPosition(); return new StringLiteralNode( diff --git a/src/Language/Parser/StructDeclaration/StructDeclarationParser.php b/src/Language/Parser/StructDeclaration/StructDeclarationParser.php index fa1aea6..319e0a6 100644 --- a/src/Language/Parser/StructDeclaration/StructDeclarationParser.php +++ b/src/Language/Parser/StructDeclaration/StructDeclarationParser.php @@ -28,7 +28,7 @@ use PackageFactory\ComponentEngine\Language\AST\Node\StructDeclaration\StructDeclarationNode; use PackageFactory\ComponentEngine\Language\AST\Node\StructDeclaration\StructNameNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; use PackageFactory\ComponentEngine\Language\Parser\PropertyDeclaration\PropertyDeclarationParser; use PackageFactory\ComponentEngine\Parser\Source\Range; @@ -40,7 +40,7 @@ final class StructDeclarationParser public function parse(Lexer $lexer): StructDeclarationNode { - $lexer->read(TokenType::KEYWORD_STRUCT); + $lexer->read(Rule::KEYWORD_STRUCT); $start = $lexer->getStartPosition(); $lexer->skipSpace(); @@ -57,7 +57,7 @@ public function parse(Lexer $lexer): StructDeclarationNode private function parseStructName(Lexer $lexer): StructNameNode { - $lexer->read(TokenType::WORD); + $lexer->read(Rule::WORD); $structNameNode = new StructNameNode( rangeInSource: $lexer->getCursorRange(), value: StructName::from($lexer->getBuffer()) @@ -72,12 +72,12 @@ public function parsePropertyDeclarations(Lexer $lexer): PropertyDeclarationNode { $this->propertyDeclarationParser ??= PropertyDeclarationParser::singleton(); - $lexer->read(TokenType::BRACKET_CURLY_OPEN); + $lexer->read(Rule::BRACKET_CURLY_OPEN); $lexer->skipSpaceAndComments(); $items = []; - while (!$lexer->probe(TokenType::BRACKET_CURLY_CLOSE)) { - $lexer->expect(TokenType::WORD); + while (!$lexer->probe(Rule::BRACKET_CURLY_CLOSE)) { + $lexer->expect(Rule::WORD); $items[] = $this->propertyDeclarationParser->parse($lexer); $lexer->skipSpaceAndComments(); } diff --git a/src/Language/Parser/Tag/TagParser.php b/src/Language/Parser/Tag/TagParser.php index 4e5f823..002e696 100644 --- a/src/Language/Parser/Tag/TagParser.php +++ b/src/Language/Parser/Tag/TagParser.php @@ -36,8 +36,8 @@ use PackageFactory\ComponentEngine\Language\AST\Node\Tag\TagNode; use PackageFactory\ComponentEngine\Language\AST\Node\Text\TextNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Language\Parser\Expression\ExpressionParser; use PackageFactory\ComponentEngine\Language\Parser\StringLiteral\StringLiteralParser; use PackageFactory\ComponentEngine\Language\Parser\Text\TextParser; @@ -47,7 +47,7 @@ final class TagParser { use Singleton; - private static TokenTypes $TOKEN_TYPES_ATTRIBUTE_DELIMITERS; + private static Rules $TOKEN_TYPES_ATTRIBUTE_DELIMITERS; private ?StringLiteralParser $stringLiteralParser = null; private ?TextParser $textParser = null; @@ -55,22 +55,22 @@ final class TagParser private function __construct() { - self::$TOKEN_TYPES_ATTRIBUTE_DELIMITERS ??= TokenTypes::from( - TokenType::STRING_LITERAL_DELIMITER, - TokenType::BRACKET_CURLY_OPEN + self::$TOKEN_TYPES_ATTRIBUTE_DELIMITERS ??= Rules::from( + Rule::STRING_LITERAL_DELIMITER, + Rule::BRACKET_CURLY_OPEN ); } public function parse(Lexer $lexer): TagNode { - $lexer->read(TokenType::BRACKET_ANGLE_OPEN); + $lexer->read(Rule::BRACKET_ANGLE_OPEN); $start = $lexer->getStartPosition(); $name = $this->parseName($lexer); $attributes = $this->parseAttributes($lexer); - if ($lexer->probe(TokenType::SYMBOL_SLASH_FORWARD)) { - $lexer->read(TokenType::BRACKET_ANGLE_CLOSE); + if ($lexer->probe(Rule::SYMBOL_SLASH_FORWARD)) { + $lexer->read(Rule::BRACKET_ANGLE_CLOSE); $end = $lexer->getEndPosition(); return new TagNode( @@ -82,7 +82,7 @@ public function parse(Lexer $lexer): TagNode ); } - $lexer->read(TokenType::BRACKET_ANGLE_CLOSE); + $lexer->read(Rule::BRACKET_ANGLE_CLOSE); $children = $this->parseChildren($lexer); $this->readClosingTagName($lexer, $name->value); @@ -99,7 +99,7 @@ public function parse(Lexer $lexer): TagNode private function parseName(Lexer $lexer): TagNameNode { - $lexer->read(TokenType::WORD); + $lexer->read(Rule::WORD); $tagNameNode = new TagNameNode( rangeInSource: Range::from( $lexer->getStartPosition(), @@ -116,7 +116,7 @@ private function parseName(Lexer $lexer): TagNameNode private function parseAttributes(Lexer $lexer): AttributeNodes { $items = []; - while ($lexer->peek(TokenType::WORD)) { + while ($lexer->peek(Rule::WORD)) { $items[] = $this->parseAttribute($lexer); $lexer->skipSpace(); } @@ -142,7 +142,7 @@ private function parseAttribute(Lexer $lexer): AttributeNode private function parseAttributeName(Lexer $lexer): AttributeNameNode { - $lexer->read(TokenType::WORD); + $lexer->read(Rule::WORD); return new AttributeNameNode( rangeInSource: $lexer->getCursorRange(), @@ -152,11 +152,11 @@ private function parseAttributeName(Lexer $lexer): AttributeNameNode private function parseAttributeValue(Lexer $lexer): null|StringLiteralNode|ExpressionNode { - if ($lexer->probe(TokenType::SYMBOL_EQUALS)) { + if ($lexer->probe(Rule::SYMBOL_EQUALS)) { return match ($lexer->expectOneOf(self::$TOKEN_TYPES_ATTRIBUTE_DELIMITERS)) { - TokenType::STRING_LITERAL_DELIMITER => + Rule::STRING_LITERAL_DELIMITER => $this->parseString($lexer), - TokenType::BRACKET_CURLY_OPEN => + Rule::BRACKET_CURLY_OPEN => $this->parseExpression($lexer), default => throw new LogicException() }; @@ -175,11 +175,11 @@ private function parseExpression(Lexer $lexer): ExpressionNode { $this->expressionParser ??= new ExpressionParser(); - $lexer->read(TokenType::BRACKET_CURLY_OPEN); + $lexer->read(Rule::BRACKET_CURLY_OPEN); $expressionNode = $this->expressionParser->parse($lexer); - $lexer->read(TokenType::BRACKET_CURLY_CLOSE); + $lexer->read(Rule::BRACKET_CURLY_CLOSE); return $expressionNode; } @@ -189,16 +189,16 @@ private function parseChildren(Lexer $lexer): ChildNodes $items = []; $preserveLeadingSpace = false; - while (!$lexer->peek(TokenType::SYMBOL_CLOSE_TAG)) { - if ($lexer->peek(TokenType::BRACKET_ANGLE_OPEN)) { + while (!$lexer->peek(Rule::SYMBOL_CLOSE_TAG)) { + if ($lexer->peek(Rule::BRACKET_ANGLE_OPEN)) { $items[] = $this->parse($lexer); - $preserveLeadingSpace = !$lexer->peek(TokenType::END_OF_LINE); + $preserveLeadingSpace = !$lexer->peek(Rule::END_OF_LINE); continue; } - if ($lexer->peek(TokenType::BRACKET_CURLY_OPEN)) { + if ($lexer->peek(Rule::BRACKET_CURLY_OPEN)) { $items[] = $this->parseExpression($lexer); - $preserveLeadingSpace = !$lexer->peek(TokenType::END_OF_LINE); + $preserveLeadingSpace = !$lexer->peek(Rule::END_OF_LINE); continue; } @@ -218,13 +218,13 @@ private function parseText(Lexer $lexer, bool $preserveLeadingSpace): ?TextNode private function readClosingTagName(Lexer $lexer, TagName $expectedName): void { - $lexer->read(TokenType::SYMBOL_CLOSE_TAG); + $lexer->read(Rule::SYMBOL_CLOSE_TAG); $start = $lexer->getStartPosition(); - $lexer->read(TokenType::WORD); + $lexer->read(Rule::WORD); $closingName = $lexer->getBuffer(); - $lexer->read(TokenType::BRACKET_ANGLE_CLOSE); + $lexer->read(Rule::BRACKET_ANGLE_CLOSE); $end = $lexer->getEndPosition(); if ($closingName !== $expectedName->value) { diff --git a/src/Language/Parser/TemplateLiteral/TemplateLiteralParser.php b/src/Language/Parser/TemplateLiteral/TemplateLiteralParser.php index 9757763..eb4c7cf 100644 --- a/src/Language/Parser/TemplateLiteral/TemplateLiteralParser.php +++ b/src/Language/Parser/TemplateLiteral/TemplateLiteralParser.php @@ -30,7 +30,7 @@ use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralSegments; use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralStringSegmentNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; use PackageFactory\ComponentEngine\Language\Parser\Expression\ExpressionParser; use PackageFactory\ComponentEngine\Parser\Source\Range; @@ -42,12 +42,12 @@ final class TemplateLiteralParser public function parse(Lexer $lexer): TemplateLiteralNode { - $lexer->read(TokenType::TEMPLATE_LITERAL_DELIMITER); + $lexer->read(Rule::TEMPLATE_LITERAL_DELIMITER); $start = $lexer->getStartPosition(); $lines = $this->parseLines($lexer); - $lexer->read(TokenType::TEMPLATE_LITERAL_DELIMITER); + $lexer->read(Rule::TEMPLATE_LITERAL_DELIMITER); $end = $lexer->getEndPosition(); return new TemplateLiteralNode( @@ -59,14 +59,14 @@ public function parse(Lexer $lexer): TemplateLiteralNode public function parseLines(Lexer $lexer): TemplateLiteralLines { - $lexer->read(TokenType::END_OF_LINE); - $lexer->probe(TokenType::SPACE); + $lexer->read(Rule::END_OF_LINE); + $lexer->probe(Rule::SPACE); $items = []; - while (!$lexer->peek(TokenType::TEMPLATE_LITERAL_DELIMITER)) { + while (!$lexer->peek(Rule::TEMPLATE_LITERAL_DELIMITER)) { $items[] = $this->parseLine($lexer); - $lexer->read(TokenType::END_OF_LINE); - $lexer->probe(TokenType::SPACE); + $lexer->read(Rule::END_OF_LINE); + $lexer->probe(Rule::SPACE); } return new TemplateLiteralLines(...$items); @@ -86,8 +86,8 @@ public function parseLine(Lexer $lexer): TemplateLiteralLine public function parseSegments(Lexer $lexer): TemplateLiteralSegments { $items = []; - while (!$lexer->peek(TokenType::END_OF_LINE)) { - if ($lexer->peek(TokenType::BRACKET_CURLY_OPEN)) { + while (!$lexer->peek(Rule::END_OF_LINE)) { + if ($lexer->peek(Rule::BRACKET_CURLY_OPEN)) { $items[] = $this->parseExpressionSegment($lexer); continue; } @@ -99,7 +99,7 @@ public function parseSegments(Lexer $lexer): TemplateLiteralSegments public function parseStringSegment(Lexer $lexer): TemplateLiteralStringSegmentNode { - $lexer->read(TokenType::TEMPLATE_LITERAL_CONTENT); + $lexer->read(Rule::TEMPLATE_LITERAL_CONTENT); return new TemplateLiteralStringSegmentNode( rangeInSource: $lexer->getCursorRange(), @@ -111,14 +111,14 @@ public function parseExpressionSegment(Lexer $lexer): TemplateLiteralExpressionS { $this->expressionParser ??= new ExpressionParser(); - $lexer->read(TokenType::BRACKET_CURLY_OPEN); + $lexer->read(Rule::BRACKET_CURLY_OPEN); $start = $lexer->getStartPosition(); $lexer->skipSpaceAndComments(); $expression = $this->expressionParser->parse($lexer); $lexer->skipSpaceAndComments(); - $lexer->read(TokenType::BRACKET_CURLY_CLOSE); + $lexer->read(Rule::BRACKET_CURLY_CLOSE); $end = $lexer->getEndPosition(); return new TemplateLiteralExpressionSegmentNode( diff --git a/src/Language/Parser/Text/TextParser.php b/src/Language/Parser/Text/TextParser.php index e405eab..6f5a896 100644 --- a/src/Language/Parser/Text/TextParser.php +++ b/src/Language/Parser/Text/TextParser.php @@ -25,8 +25,8 @@ use PackageFactory\ComponentEngine\Framework\PHP\Singleton\Singleton; use PackageFactory\ComponentEngine\Language\AST\Node\Text\TextNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Parser\Source\Position; use PackageFactory\ComponentEngine\Parser\Source\Range; @@ -34,20 +34,20 @@ final class TextParser { use Singleton; - private static TokenTypes $TOKEN_TYPES_END_DELIMITERS; - private static TokenTypes $TOKEN_TYPES_CONTENT; + private static Rules $TOKEN_TYPES_END_DELIMITERS; + private static Rules $TOKEN_TYPES_CONTENT; private function __construct() { - self::$TOKEN_TYPES_END_DELIMITERS = TokenTypes::from( - TokenType::SYMBOL_CLOSE_TAG, - TokenType::BRACKET_ANGLE_OPEN, - TokenType::BRACKET_CURLY_OPEN + self::$TOKEN_TYPES_END_DELIMITERS = Rules::from( + Rule::SYMBOL_CLOSE_TAG, + Rule::BRACKET_ANGLE_OPEN, + Rule::BRACKET_CURLY_OPEN ); - self::$TOKEN_TYPES_CONTENT = TokenTypes::from( - TokenType::SPACE, - TokenType::END_OF_LINE, - TokenType::TEXT + self::$TOKEN_TYPES_CONTENT = Rules::from( + Rule::SPACE, + Rule::END_OF_LINE, + Rule::TEXT ); } @@ -57,12 +57,12 @@ public function parse(Lexer $lexer, bool $preserveLeadingSpace = false): ?TextNo $start = null; $hasLeadingSpace = false; - if ($lexer->probe(TokenType::SPACE)) { + if ($lexer->probe(Rule::SPACE)) { $start = $lexer->getStartPosition(); $hasLeadingSpace = true; } - if ($lexer->probe(TokenType::END_OF_LINE)) { + if ($lexer->probe(Rule::END_OF_LINE)) { $start ??= $lexer->getStartPosition(); $hasLeadingSpace = false; } @@ -78,7 +78,7 @@ public function parse(Lexer $lexer, bool $preserveLeadingSpace = false): ?TextNo while (!$lexer->isEnd() && !$lexer->peekOneOf(self::$TOKEN_TYPES_END_DELIMITERS)) { $lexer->readOneOf(self::$TOKEN_TYPES_CONTENT); - if ($lexer->getTokenTypeUnderCursor() === TokenType::TEXT) { + if ($lexer->getRuleUnderCursor() === Rule::TEXT) { $start ??= $lexer->getStartPosition(); if ($hasTrailingSpace) { $value .= ' '; @@ -89,7 +89,7 @@ public function parse(Lexer $lexer, bool $preserveLeadingSpace = false): ?TextNo continue; } - if ($lexer->getTokenTypeUnderCursor() === TokenType::END_OF_LINE) { + if ($lexer->getRuleUnderCursor() === Rule::END_OF_LINE) { $trailingSpaceContainsLineBreaks = true; } @@ -102,7 +102,7 @@ public function parse(Lexer $lexer, bool $preserveLeadingSpace = false): ?TextNo $end = $lexer->getEndPosition(); - if ($hasTrailingSpace && !$trailingSpaceContainsLineBreaks && !$lexer->isEnd() && !$lexer->peek(TokenType::SYMBOL_CLOSE_TAG)) { + if ($hasTrailingSpace && !$trailingSpaceContainsLineBreaks && !$lexer->isEnd() && !$lexer->peek(Rule::SYMBOL_CLOSE_TAG)) { $value .= ' '; } diff --git a/src/Language/Parser/TypeReference/TypeReferenceParser.php b/src/Language/Parser/TypeReference/TypeReferenceParser.php index a1075fb..38d91eb 100644 --- a/src/Language/Parser/TypeReference/TypeReferenceParser.php +++ b/src/Language/Parser/TypeReference/TypeReferenceParser.php @@ -30,7 +30,7 @@ use PackageFactory\ComponentEngine\Language\AST\Node\TypeReference\TypeNameNodes; use PackageFactory\ComponentEngine\Language\AST\Node\TypeReference\TypeReferenceNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; use PackageFactory\ComponentEngine\Parser\Source\Position; use PackageFactory\ComponentEngine\Parser\Source\Range; @@ -43,7 +43,7 @@ final class TypeReferenceParser public function parse(Lexer $lexer): TypeReferenceNode { $this->start = null; - $isOptional = $lexer->probe(TokenType::SYMBOL_QUESTIONMARK); + $isOptional = $lexer->probe(Rule::SYMBOL_QUESTIONMARK); $this->start = $lexer->getStartPosition(); $typeNameNodes = $this->parseTypeNames($lexer); $isArray = $this->parseIsArray($lexer); @@ -67,7 +67,7 @@ public function parseTypeNames(Lexer $lexer): TypeNameNodes while (true) { $items[] = $this->parseTypeName($lexer); - if ($lexer->isEnd() || !$lexer->probe(TokenType::SYMBOL_PIPE)) { + if ($lexer->isEnd() || !$lexer->probe(Rule::SYMBOL_PIPE)) { break; } } @@ -81,7 +81,7 @@ public function parseTypeNames(Lexer $lexer): TypeNameNodes public function parseTypeName(Lexer $lexer): TypeNameNode { - $lexer->read(TokenType::WORD); + $lexer->read(Rule::WORD); $this->start ??= $lexer->getStartPosition(); return new TypeNameNode( @@ -96,8 +96,8 @@ public function parseIsArray(Lexer $lexer): bool return false; } - if ($lexer->probe(TokenType::BRACKET_SQUARE_OPEN)) { - $lexer->read(TokenType::BRACKET_SQUARE_CLOSE); + if ($lexer->probe(Rule::BRACKET_SQUARE_OPEN)) { + $lexer->read(Rule::BRACKET_SQUARE_CLOSE); return true; } diff --git a/src/Language/Parser/ValueReference/ValueReferenceParser.php b/src/Language/Parser/ValueReference/ValueReferenceParser.php index 49aef07..0c5b15e 100644 --- a/src/Language/Parser/ValueReference/ValueReferenceParser.php +++ b/src/Language/Parser/ValueReference/ValueReferenceParser.php @@ -26,7 +26,7 @@ use PackageFactory\ComponentEngine\Framework\PHP\Singleton\Singleton; use PackageFactory\ComponentEngine\Language\AST\Node\ValueReference\ValueReferenceNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; final class ValueReferenceParser { @@ -34,7 +34,7 @@ final class ValueReferenceParser public function parse(Lexer $lexer): ValueReferenceNode { - $lexer->read(TokenType::WORD); + $lexer->read(Rule::WORD); return new ValueReferenceNode( rangeInSource: $lexer->getCursorRange(), diff --git a/src/Language/Util/DebugHelper.php b/src/Language/Util/DebugHelper.php index 05c3a45..46b8caa 100644 --- a/src/Language/Util/DebugHelper.php +++ b/src/Language/Util/DebugHelper.php @@ -33,97 +33,97 @@ use PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral\TemplateLiteralStringSegmentNode; use PackageFactory\ComponentEngine\Language\AST\Node\TernaryOperation\TernaryOperationNode; use PackageFactory\ComponentEngine\Language\AST\Node\ValueReference\ValueReferenceNode; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; final class DebugHelper { - public static function describeTokenType(TokenType $tokenType): string + public static function describeRule(Rule $tokenType): string { return $tokenType->value . match ($tokenType) { - TokenType::COMMENT => ' (e.g. "# ...")', - - TokenType::KEYWORD_FROM => ' ("from")', - TokenType::KEYWORD_IMPORT => ' ("import")', - TokenType::KEYWORD_EXPORT => ' ("export")', - TokenType::KEYWORD_ENUM => ' ("enum")', - TokenType::KEYWORD_STRUCT => ' ("struct")', - TokenType::KEYWORD_COMPONENT => ' ("component")', - TokenType::KEYWORD_MATCH => ' ("match")', - TokenType::KEYWORD_DEFAULT => ' ("default")', - TokenType::KEYWORD_RETURN => ' ("return")', - TokenType::KEYWORD_TRUE => ' ("true")', - TokenType::KEYWORD_FALSE => ' ("false")', - TokenType::KEYWORD_NULL => ' ("null")', - - TokenType::STRING_LITERAL_DELIMITER => ' (""")', - TokenType::STRING_LITERAL_CONTENT => '', - - TokenType::INTEGER_BINARY => ' (e.g. "0b1001")', - TokenType::INTEGER_OCTAL => ' (e.g. "0o644")', - TokenType::INTEGER_DECIMAL => ' (e.g. "42")', - TokenType::INTEGER_HEXADECIMAL => ' (e.g. "0xABC")', - - TokenType::TEMPLATE_LITERAL_DELIMITER => ' (""""")', - TokenType::TEMPLATE_LITERAL_CONTENT => '', - - TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER => ' (e.g. "\\\\" or "\\n")', - TokenType::ESCAPE_SEQUENCE_HEXADECIMAL => ' (e.g. "\\xA9")', - TokenType::ESCAPE_SEQUENCE_UNICODE => ' (e.g. "\\u00A9")', - TokenType::ESCAPE_SEQUENCE_UNICODE_CODEPOINT => ' (e.g. "\\u{2F804}")', - - TokenType::BRACKET_CURLY_OPEN => ' ("{")', - TokenType::BRACKET_CURLY_CLOSE => ' ("}")', - TokenType::BRACKET_ROUND_OPEN => ' ("(")', - TokenType::BRACKET_ROUND_CLOSE => ' (")")', - TokenType::BRACKET_SQUARE_OPEN => ' ("[")', - TokenType::BRACKET_SQUARE_CLOSE => ' ("]")', - TokenType::BRACKET_ANGLE_OPEN => ' ("<")', - TokenType::BRACKET_ANGLE_CLOSE => ' (">")', - - TokenType::SYMBOL_PERIOD => ' (".")', - TokenType::SYMBOL_COLON => ' (":")', - TokenType::SYMBOL_QUESTIONMARK => ' ("?")', - TokenType::SYMBOL_EXCLAMATIONMARK => ' ("!")', - TokenType::SYMBOL_COMMA => ' (",")', - TokenType::SYMBOL_DASH => ' ("-")', - TokenType::SYMBOL_EQUALS => ' ("=")', - TokenType::SYMBOL_SLASH_FORWARD => ' ("/")', - TokenType::SYMBOL_PIPE => ' ("|")', - TokenType::SYMBOL_BOOLEAN_AND => ' ("&&")', - TokenType::SYMBOL_BOOLEAN_OR => ' ("||")', - TokenType::SYMBOL_STRICT_EQUALS => ' ("===")', - TokenType::SYMBOL_NOT_EQUALS => ' ("!==")', - TokenType::SYMBOL_GREATER_THAN => ' (">")', - TokenType::SYMBOL_GREATER_THAN_OR_EQUAL => ' (">=")', - TokenType::SYMBOL_LESS_THAN => ' ("<")', - TokenType::SYMBOL_LESS_THAN_OR_EQUAL => ' ("<=")', - TokenType::SYMBOL_ARROW_SINGLE => ' ("->")', - TokenType::SYMBOL_OPTCHAIN => ' ("?.")', - TokenType::SYMBOL_NULLISH_COALESCE => ' ("??")', - TokenType::SYMBOL_CLOSE_TAG => ' (" '', - TokenType::TEXT => '', - - TokenType::SPACE => '', - TokenType::END_OF_LINE => '' + Rule::COMMENT => ' (e.g. "# ...")', + + Rule::KEYWORD_FROM => ' ("from")', + Rule::KEYWORD_IMPORT => ' ("import")', + Rule::KEYWORD_EXPORT => ' ("export")', + Rule::KEYWORD_ENUM => ' ("enum")', + Rule::KEYWORD_STRUCT => ' ("struct")', + Rule::KEYWORD_COMPONENT => ' ("component")', + Rule::KEYWORD_MATCH => ' ("match")', + Rule::KEYWORD_DEFAULT => ' ("default")', + Rule::KEYWORD_RETURN => ' ("return")', + Rule::KEYWORD_TRUE => ' ("true")', + Rule::KEYWORD_FALSE => ' ("false")', + Rule::KEYWORD_NULL => ' ("null")', + + Rule::STRING_LITERAL_DELIMITER => ' (""")', + Rule::STRING_LITERAL_CONTENT => '', + + Rule::INTEGER_BINARY => ' (e.g. "0b1001")', + Rule::INTEGER_OCTAL => ' (e.g. "0o644")', + Rule::INTEGER_DECIMAL => ' (e.g. "42")', + Rule::INTEGER_HEXADECIMAL => ' (e.g. "0xABC")', + + Rule::TEMPLATE_LITERAL_DELIMITER => ' (""""")', + Rule::TEMPLATE_LITERAL_CONTENT => '', + + Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER => ' (e.g. "\\\\" or "\\n")', + Rule::ESCAPE_SEQUENCE_HEXADECIMAL => ' (e.g. "\\xA9")', + Rule::ESCAPE_SEQUENCE_UNICODE => ' (e.g. "\\u00A9")', + Rule::ESCAPE_SEQUENCE_UNICODE_CODEPOINT => ' (e.g. "\\u{2F804}")', + + Rule::BRACKET_CURLY_OPEN => ' ("{")', + Rule::BRACKET_CURLY_CLOSE => ' ("}")', + Rule::BRACKET_ROUND_OPEN => ' ("(")', + Rule::BRACKET_ROUND_CLOSE => ' (")")', + Rule::BRACKET_SQUARE_OPEN => ' ("[")', + Rule::BRACKET_SQUARE_CLOSE => ' ("]")', + Rule::BRACKET_ANGLE_OPEN => ' ("<")', + Rule::BRACKET_ANGLE_CLOSE => ' (">")', + + Rule::SYMBOL_PERIOD => ' (".")', + Rule::SYMBOL_COLON => ' (":")', + Rule::SYMBOL_QUESTIONMARK => ' ("?")', + Rule::SYMBOL_EXCLAMATIONMARK => ' ("!")', + Rule::SYMBOL_COMMA => ' (",")', + Rule::SYMBOL_DASH => ' ("-")', + Rule::SYMBOL_EQUALS => ' ("=")', + Rule::SYMBOL_SLASH_FORWARD => ' ("/")', + Rule::SYMBOL_PIPE => ' ("|")', + Rule::SYMBOL_BOOLEAN_AND => ' ("&&")', + Rule::SYMBOL_BOOLEAN_OR => ' ("||")', + Rule::SYMBOL_STRICT_EQUALS => ' ("===")', + Rule::SYMBOL_NOT_EQUALS => ' ("!==")', + Rule::SYMBOL_GREATER_THAN => ' (">")', + Rule::SYMBOL_GREATER_THAN_OR_EQUAL => ' (">=")', + Rule::SYMBOL_LESS_THAN => ' ("<")', + Rule::SYMBOL_LESS_THAN_OR_EQUAL => ' ("<=")', + Rule::SYMBOL_ARROW_SINGLE => ' ("->")', + Rule::SYMBOL_OPTCHAIN => ' ("?.")', + Rule::SYMBOL_NULLISH_COALESCE => ' ("??")', + Rule::SYMBOL_CLOSE_TAG => ' (" '', + Rule::TEXT => '', + + Rule::SPACE => '', + Rule::END_OF_LINE => '' }; } - public static function describeTokenTypes(TokenTypes $tokenTypes): string + public static function describeRules(Rules $tokenTypes): string { if (count($tokenTypes->items) === 1) { - return self::describeTokenType($tokenTypes->items[0]); + return self::describeRule($tokenTypes->items[0]); } $leadingItems = array_slice($tokenTypes->items, 0, -1); $trailingItem = array_slice($tokenTypes->items, -1)[0]; return join(', ', array_map( - static fn (TokenType $tokenType) => self::describeTokenType($tokenType), + static fn (Rule $tokenType) => self::describeRule($tokenType), $leadingItems - )) . ' or ' . self::describeTokenType($trailingItem); + )) . ' or ' . self::describeRule($trailingItem); } public static function printASTNode(Node $node, string $indentation = ''): string diff --git a/test/Unit/Language/Lexer/LexerTest.php b/test/Unit/Language/Lexer/LexerTest.php index 9bb0cb1..51f2587 100644 --- a/test/Unit/Language/Lexer/LexerTest.php +++ b/test/Unit/Language/Lexer/LexerTest.php @@ -24,8 +24,8 @@ use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\LexerException; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Parser\Source\Position; use PackageFactory\ComponentEngine\Parser\Source\Range; use PHPUnit\Framework\TestCase; @@ -37,7 +37,7 @@ final class LexerTest extends TestCase protected function assertLexerState( Position $startPosition, Position $endPosition, - TokenType $tokenTypeUnderCursor, + Rule $tokenTypeUnderCursor, string $buffer, bool $isEnd ): void { @@ -55,7 +55,7 @@ protected function assertLexerState( $this->assertEquals( $tokenTypeUnderCursor, - $this->lexer->getTokenTypeUnderCursor(), + $this->lexer->getRuleUnderCursor(), 'Failed asserting that token type under cursor of lexer equals' ); @@ -90,170 +90,170 @@ protected function assertThrowsLexerException(callable $fn, LexerException $expe public static function singleTokenExamples(): iterable { yield ($source = '#') => - [$source, TokenType::COMMENT]; + [$source, Rule::COMMENT]; yield ($source = '# This is a comment') => - [$source, TokenType::COMMENT]; + [$source, Rule::COMMENT]; yield ($source = '# 🌵🆚⌚️: Multi-byte characters are not a problem inside a comment.') => - [$source, TokenType::COMMENT]; + [$source, Rule::COMMENT]; yield ($source = 'from') => - [$source, TokenType::KEYWORD_FROM]; + [$source, Rule::KEYWORD_FROM]; yield ($source = 'import') => - [$source, TokenType::KEYWORD_IMPORT]; + [$source, Rule::KEYWORD_IMPORT]; yield ($source = 'export') => - [$source, TokenType::KEYWORD_EXPORT]; + [$source, Rule::KEYWORD_EXPORT]; yield ($source = 'enum') => - [$source, TokenType::KEYWORD_ENUM]; + [$source, Rule::KEYWORD_ENUM]; yield ($source = 'struct') => - [$source, TokenType::KEYWORD_STRUCT]; + [$source, Rule::KEYWORD_STRUCT]; yield ($source = 'component') => - [$source, TokenType::KEYWORD_COMPONENT]; + [$source, Rule::KEYWORD_COMPONENT]; yield ($source = 'match') => - [$source, TokenType::KEYWORD_MATCH]; + [$source, Rule::KEYWORD_MATCH]; yield ($source = 'default') => - [$source, TokenType::KEYWORD_DEFAULT]; + [$source, Rule::KEYWORD_DEFAULT]; yield ($source = 'return') => - [$source, TokenType::KEYWORD_RETURN]; + [$source, Rule::KEYWORD_RETURN]; yield ($source = 'true') => - [$source, TokenType::KEYWORD_TRUE]; + [$source, Rule::KEYWORD_TRUE]; yield ($source = 'false') => - [$source, TokenType::KEYWORD_FALSE]; + [$source, Rule::KEYWORD_FALSE]; yield ($source = 'null') => - [$source, TokenType::KEYWORD_NULL]; + [$source, Rule::KEYWORD_NULL]; yield ($source = '"') => - [$source, TokenType::STRING_LITERAL_DELIMITER]; + [$source, Rule::STRING_LITERAL_DELIMITER]; yield ($source = 'Some string without any escapes') => - [$source, TokenType::STRING_LITERAL_CONTENT]; + [$source, Rule::STRING_LITERAL_CONTENT]; yield ($source = '🌵🆚⌚️: Multi-byte characters are not a problem inside a string.') => - [$source, TokenType::STRING_LITERAL_CONTENT]; + [$source, Rule::STRING_LITERAL_CONTENT]; yield ($source = '0b1001') => - [$source, TokenType::INTEGER_BINARY]; + [$source, Rule::INTEGER_BINARY]; yield ($source = '0o12345670') => - [$source, TokenType::INTEGER_OCTAL]; + [$source, Rule::INTEGER_OCTAL]; yield ($source = '1234567890') => - [$source, TokenType::INTEGER_DECIMAL]; + [$source, Rule::INTEGER_DECIMAL]; yield ($source = '0xABCDEF1234567890') => - [$source, TokenType::INTEGER_HEXADECIMAL]; + [$source, Rule::INTEGER_HEXADECIMAL]; yield ($source = '"""') => - [$source, TokenType::TEMPLATE_LITERAL_DELIMITER]; + [$source, Rule::TEMPLATE_LITERAL_DELIMITER]; yield ($source = 'Some string without escapes') => - [$source, TokenType::TEMPLATE_LITERAL_CONTENT]; + [$source, Rule::TEMPLATE_LITERAL_CONTENT]; yield ($source = '🌵🆚⌚️: Multi-byte characters are not a problem inside of template literals.') => - [$source, TokenType::TEMPLATE_LITERAL_CONTENT]; + [$source, Rule::TEMPLATE_LITERAL_CONTENT]; yield ($source = '\\\\') => - [$source, TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER]; + [$source, Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER]; yield ($source = '\\n') => - [$source, TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER]; + [$source, Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER]; yield ($source = '\\t') => - [$source, TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER]; + [$source, Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER]; yield ($source = '\\xA9') => - [$source, TokenType::ESCAPE_SEQUENCE_HEXADECIMAL]; + [$source, Rule::ESCAPE_SEQUENCE_HEXADECIMAL]; yield ($source = '\\u00A9') => - [$source, TokenType::ESCAPE_SEQUENCE_UNICODE]; + [$source, Rule::ESCAPE_SEQUENCE_UNICODE]; yield ($source = '\\u{2F804}') => - [$source, TokenType::ESCAPE_SEQUENCE_UNICODE_CODEPOINT]; + [$source, Rule::ESCAPE_SEQUENCE_UNICODE_CODEPOINT]; yield ($source = '{') => - [$source, TokenType::BRACKET_CURLY_OPEN]; + [$source, Rule::BRACKET_CURLY_OPEN]; yield ($source = '}') => - [$source, TokenType::BRACKET_CURLY_CLOSE]; + [$source, Rule::BRACKET_CURLY_CLOSE]; yield ($source = '(') => - [$source, TokenType::BRACKET_ROUND_OPEN]; + [$source, Rule::BRACKET_ROUND_OPEN]; yield ($source = ')') => - [$source, TokenType::BRACKET_ROUND_CLOSE]; + [$source, Rule::BRACKET_ROUND_CLOSE]; yield ($source = '[') => - [$source, TokenType::BRACKET_SQUARE_OPEN]; + [$source, Rule::BRACKET_SQUARE_OPEN]; yield ($source = ']') => - [$source, TokenType::BRACKET_SQUARE_CLOSE]; + [$source, Rule::BRACKET_SQUARE_CLOSE]; yield ($source = '<') => - [$source, TokenType::BRACKET_ANGLE_OPEN]; + [$source, Rule::BRACKET_ANGLE_OPEN]; yield ($source = '>') => - [$source, TokenType::BRACKET_ANGLE_CLOSE]; + [$source, Rule::BRACKET_ANGLE_CLOSE]; yield ($source = '.') => - [$source, TokenType::SYMBOL_PERIOD]; + [$source, Rule::SYMBOL_PERIOD]; yield ($source = ':') => - [$source, TokenType::SYMBOL_COLON]; + [$source, Rule::SYMBOL_COLON]; yield ($source = '?') => - [$source, TokenType::SYMBOL_QUESTIONMARK]; + [$source, Rule::SYMBOL_QUESTIONMARK]; yield ($source = '!') => - [$source, TokenType::SYMBOL_EXCLAMATIONMARK]; + [$source, Rule::SYMBOL_EXCLAMATIONMARK]; yield ($source = ',') => - [$source, TokenType::SYMBOL_COMMA]; + [$source, Rule::SYMBOL_COMMA]; yield ($source = '-') => - [$source, TokenType::SYMBOL_DASH]; + [$source, Rule::SYMBOL_DASH]; yield ($source = '=') => - [$source, TokenType::SYMBOL_EQUALS]; + [$source, Rule::SYMBOL_EQUALS]; yield ($source = '/') => - [$source, TokenType::SYMBOL_SLASH_FORWARD]; + [$source, Rule::SYMBOL_SLASH_FORWARD]; yield ($source = '|') => - [$source, TokenType::SYMBOL_PIPE]; + [$source, Rule::SYMBOL_PIPE]; yield ($source = '&&') => - [$source, TokenType::SYMBOL_BOOLEAN_AND]; + [$source, Rule::SYMBOL_BOOLEAN_AND]; yield ($source = '||') => - [$source, TokenType::SYMBOL_BOOLEAN_OR]; + [$source, Rule::SYMBOL_BOOLEAN_OR]; yield ($source = '===') => - [$source, TokenType::SYMBOL_STRICT_EQUALS]; + [$source, Rule::SYMBOL_STRICT_EQUALS]; yield ($source = '!==') => - [$source, TokenType::SYMBOL_NOT_EQUALS]; + [$source, Rule::SYMBOL_NOT_EQUALS]; yield ($source = '>=') => - [$source, TokenType::SYMBOL_GREATER_THAN_OR_EQUAL]; + [$source, Rule::SYMBOL_GREATER_THAN_OR_EQUAL]; yield ($source = '<=') => - [$source, TokenType::SYMBOL_LESS_THAN_OR_EQUAL]; + [$source, Rule::SYMBOL_LESS_THAN_OR_EQUAL]; yield ($source = '->') => - [$source, TokenType::SYMBOL_ARROW_SINGLE]; + [$source, Rule::SYMBOL_ARROW_SINGLE]; yield ($source = '?.') => - [$source, TokenType::SYMBOL_OPTCHAIN]; + [$source, Rule::SYMBOL_OPTCHAIN]; yield ($source = '??') => - [$source, TokenType::SYMBOL_NULLISH_COALESCE]; + [$source, Rule::SYMBOL_NULLISH_COALESCE]; yield ($source = 'ValidWord') => - [$source, TokenType::WORD]; + [$source, Rule::WORD]; yield ($source = 'V4l1dW0rd') => - [$source, TokenType::WORD]; + [$source, Rule::WORD]; yield ($source = '1245ValidWord') => - [$source, TokenType::WORD]; + [$source, Rule::WORD]; yield ($source = 'JustSomeText.TextTerminates-Only-At??Space//Characters.') => - [$source, TokenType::TEXT]; + [$source, Rule::TEXT]; yield ($source = '🌵🆚⌚️') => - [$source, TokenType::TEXT]; + [$source, Rule::TEXT]; yield ($source = ' ') => - [$source, TokenType::SPACE]; + [$source, Rule::SPACE]; yield ($source = ' ') => - [$source, TokenType::SPACE]; + [$source, Rule::SPACE]; yield ($source = "\t") => - [$source, TokenType::SPACE]; + [$source, Rule::SPACE]; yield ($source = "\t\t\t\t") => - [$source, TokenType::SPACE]; + [$source, Rule::SPACE]; yield ($source = " \t \t \t \t ") => - [$source, TokenType::SPACE]; + [$source, Rule::SPACE]; yield ($source = "\n") => - [$source, TokenType::END_OF_LINE]; + [$source, Rule::END_OF_LINE]; } /** * @dataProvider singleTokenExamples * @test * @param string $source - * @param TokenType $expectedTokenType + * @param Rule $expectedRule * @return void */ - public function readSavesTokenOfGivenTypeIfMatchIsFound(string $source, TokenType $expectedTokenType): void + public function readSavesTokenOfGivenTypeIfMatchIsFound(string $source, Rule $expectedRule): void { $this->lexer = new Lexer($source); - $this->lexer->read($expectedTokenType); + $this->lexer->read($expectedRule); $this->assertLexerState( startPosition: Position::from(0, 0), endPosition: Position::from(0, \mb_strlen($source) - 1), - tokenTypeUnderCursor: $expectedTokenType, + tokenTypeUnderCursor: $expectedRule, buffer: $source, isEnd: true ); @@ -263,99 +263,99 @@ public function readSavesTokenOfGivenTypeIfMatchIsFound(string $source, TokenTyp * @dataProvider singleTokenExamples * @test * @param string $source - * @param TokenType $expectedTokenType + * @param Rule $expectedRule * @return void */ - public function readOneOfSavesTokenOfGivenTypeIfMatchIsFound(string $source, TokenType $expectedTokenType): void + public function readOneOfSavesTokenOfGivenTypeIfMatchIsFound(string $source, Rule $expectedRule): void { $this->lexer = new Lexer($source); - $this->lexer->readOneOf(TokenTypes::from($expectedTokenType)); + $this->lexer->readOneOf(Rules::from($expectedRule)); $this->assertLexerState( startPosition: Position::from(0, 0), endPosition: Position::from(0, \mb_strlen($source) - 1), - tokenTypeUnderCursor: $expectedTokenType, + tokenTypeUnderCursor: $expectedRule, buffer: $source, isEnd: true ); } /** - * @return iterable + * @return iterable */ public static function multipleTokensExamples(): iterable { yield ($source = "# This is a comment\n# This is also a comment") => [ $source, - TokenTypes::from(TokenType::COMMENT, TokenType::END_OF_LINE), - [[0, 0], [0, 18], TokenType::COMMENT, '# This is a comment'], - [[0, 19], [0, 19], TokenType::END_OF_LINE, "\n"], - [[1, 0], [1, 23], TokenType::COMMENT, '# This is also a comment'], + Rules::from(Rule::COMMENT, Rule::END_OF_LINE), + [[0, 0], [0, 18], Rule::COMMENT, '# This is a comment'], + [[0, 19], [0, 19], Rule::END_OF_LINE, "\n"], + [[1, 0], [1, 23], Rule::COMMENT, '# This is also a comment'], ]; yield ($source = "1765224, -0xAB89CD, true\nnull") => [ $source, - TokenTypes::from( - TokenType::SYMBOL_DASH, - TokenType::SYMBOL_COMMA, - TokenType::INTEGER_HEXADECIMAL, - TokenType::INTEGER_DECIMAL, - TokenType::SPACE, - TokenType::END_OF_LINE, - TokenType::KEYWORD_TRUE, - TokenType::KEYWORD_NULL + Rules::from( + Rule::SYMBOL_DASH, + Rule::SYMBOL_COMMA, + Rule::INTEGER_HEXADECIMAL, + Rule::INTEGER_DECIMAL, + Rule::SPACE, + Rule::END_OF_LINE, + Rule::KEYWORD_TRUE, + Rule::KEYWORD_NULL ), - [[0, 0], [0, 6], TokenType::INTEGER_DECIMAL, '1765224'], - [[0, 7], [0, 7], TokenType::SYMBOL_COMMA, ','], - [[0, 8], [0, 8], TokenType::SPACE, ' '], - [[0, 9], [0, 9], TokenType::SYMBOL_DASH, '-'], - [[0, 10], [0, 17], TokenType::INTEGER_HEXADECIMAL, '0xAB89CD'], - [[0, 18], [0, 18], TokenType::SYMBOL_COMMA, ','], - [[0, 19], [0, 19], TokenType::SPACE, ' '], - [[0, 20], [0, 23], TokenType::KEYWORD_TRUE, 'true'], - [[0, 24], [0, 24], TokenType::END_OF_LINE, "\n"], - [[1, 0], [1, 3], TokenType::KEYWORD_NULL, 'null'], + [[0, 0], [0, 6], Rule::INTEGER_DECIMAL, '1765224'], + [[0, 7], [0, 7], Rule::SYMBOL_COMMA, ','], + [[0, 8], [0, 8], Rule::SPACE, ' '], + [[0, 9], [0, 9], Rule::SYMBOL_DASH, '-'], + [[0, 10], [0, 17], Rule::INTEGER_HEXADECIMAL, '0xAB89CD'], + [[0, 18], [0, 18], Rule::SYMBOL_COMMA, ','], + [[0, 19], [0, 19], Rule::SPACE, ' '], + [[0, 20], [0, 23], Rule::KEYWORD_TRUE, 'true'], + [[0, 24], [0, 24], Rule::END_OF_LINE, "\n"], + [[1, 0], [1, 3], Rule::KEYWORD_NULL, 'null'], ]; yield ($source = '0b100101 892837 0xFFAAEE 0o75374') => [ $source, - TokenTypes::from( - TokenType::INTEGER_BINARY, - TokenType::INTEGER_OCTAL, - TokenType::INTEGER_HEXADECIMAL, - TokenType::INTEGER_DECIMAL, - TokenType::SPACE + Rules::from( + Rule::INTEGER_BINARY, + Rule::INTEGER_OCTAL, + Rule::INTEGER_HEXADECIMAL, + Rule::INTEGER_DECIMAL, + Rule::SPACE ), - [[0, 0], [0, 7], TokenType::INTEGER_BINARY, '0b100101'], - [[0, 8], [0, 8], TokenType::SPACE, ' '], - [[0, 9], [0, 14], TokenType::INTEGER_DECIMAL, '892837'], - [[0, 15], [0, 15], TokenType::SPACE, ' '], - [[0, 16], [0, 23], TokenType::INTEGER_HEXADECIMAL, '0xFFAAEE'], - [[0, 24], [0, 24], TokenType::SPACE, ' '], - [[0, 25], [0, 31], TokenType::INTEGER_OCTAL, '0o75374'], + [[0, 0], [0, 7], Rule::INTEGER_BINARY, '0b100101'], + [[0, 8], [0, 8], Rule::SPACE, ' '], + [[0, 9], [0, 14], Rule::INTEGER_DECIMAL, '892837'], + [[0, 15], [0, 15], Rule::SPACE, ' '], + [[0, 16], [0, 23], Rule::INTEGER_HEXADECIMAL, '0xFFAAEE'], + [[0, 24], [0, 24], Rule::SPACE, ' '], + [[0, 25], [0, 31], Rule::INTEGER_OCTAL, '0o75374'], ]; yield ($source = '"This is a string literal with \\n escapes \\xB1 \\u5FA9 \\u{1343E}!"') => [ $source, - TokenTypes::from( - TokenType::STRING_LITERAL_DELIMITER, - TokenType::STRING_LITERAL_CONTENT, - TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, - TokenType::ESCAPE_SEQUENCE_HEXADECIMAL, - TokenType::ESCAPE_SEQUENCE_UNICODE, - TokenType::ESCAPE_SEQUENCE_UNICODE_CODEPOINT + Rules::from( + Rule::STRING_LITERAL_DELIMITER, + Rule::STRING_LITERAL_CONTENT, + Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER, + Rule::ESCAPE_SEQUENCE_HEXADECIMAL, + Rule::ESCAPE_SEQUENCE_UNICODE, + Rule::ESCAPE_SEQUENCE_UNICODE_CODEPOINT ), - [[0, 0], [0, 0], TokenType::STRING_LITERAL_DELIMITER, '"'], - [[0, 1], [0, 30], TokenType::STRING_LITERAL_CONTENT, 'This is a string literal with '], - [[0, 31], [0, 32], TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\n'], - [[0, 33], [0, 41], TokenType::STRING_LITERAL_CONTENT, ' escapes '], - [[0, 42], [0, 45], TokenType::ESCAPE_SEQUENCE_HEXADECIMAL, '\\xB1'], - [[0, 46], [0, 46], TokenType::STRING_LITERAL_CONTENT, ' '], - [[0, 47], [0, 52], TokenType::ESCAPE_SEQUENCE_UNICODE, '\\u5FA9'], - [[0, 53], [0, 53], TokenType::STRING_LITERAL_CONTENT, ' '], - [[0, 54], [0, 62], TokenType::ESCAPE_SEQUENCE_UNICODE_CODEPOINT, '\\u{1343E}'], - [[0, 63], [0, 63], TokenType::STRING_LITERAL_CONTENT, '!'], - [[0, 64], [0, 64], TokenType::STRING_LITERAL_DELIMITER, '"'] + [[0, 0], [0, 0], Rule::STRING_LITERAL_DELIMITER, '"'], + [[0, 1], [0, 30], Rule::STRING_LITERAL_CONTENT, 'This is a string literal with '], + [[0, 31], [0, 32], Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\n'], + [[0, 33], [0, 41], Rule::STRING_LITERAL_CONTENT, ' escapes '], + [[0, 42], [0, 45], Rule::ESCAPE_SEQUENCE_HEXADECIMAL, '\\xB1'], + [[0, 46], [0, 46], Rule::STRING_LITERAL_CONTENT, ' '], + [[0, 47], [0, 52], Rule::ESCAPE_SEQUENCE_UNICODE, '\\u5FA9'], + [[0, 53], [0, 53], Rule::STRING_LITERAL_CONTENT, ' '], + [[0, 54], [0, 62], Rule::ESCAPE_SEQUENCE_UNICODE_CODEPOINT, '\\u{1343E}'], + [[0, 63], [0, 63], Rule::STRING_LITERAL_CONTENT, '!'], + [[0, 64], [0, 64], Rule::STRING_LITERAL_DELIMITER, '"'] ]; $source = << [ $source, - TokenTypes::from( - TokenType::TEMPLATE_LITERAL_DELIMITER, - TokenType::SPACE, - TokenType::TEMPLATE_LITERAL_CONTENT, - TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, - TokenType::ESCAPE_SEQUENCE_HEXADECIMAL, - TokenType::ESCAPE_SEQUENCE_UNICODE, - TokenType::ESCAPE_SEQUENCE_UNICODE_CODEPOINT, - TokenType::END_OF_LINE, - TokenType::BRACKET_CURLY_OPEN, - TokenType::BRACKET_CURLY_CLOSE + Rules::from( + Rule::TEMPLATE_LITERAL_DELIMITER, + Rule::SPACE, + Rule::TEMPLATE_LITERAL_CONTENT, + Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER, + Rule::ESCAPE_SEQUENCE_HEXADECIMAL, + Rule::ESCAPE_SEQUENCE_UNICODE, + Rule::ESCAPE_SEQUENCE_UNICODE_CODEPOINT, + Rule::END_OF_LINE, + Rule::BRACKET_CURLY_OPEN, + Rule::BRACKET_CURLY_CLOSE ), - [[0, 0], [0, 2], TokenType::TEMPLATE_LITERAL_DELIMITER, '"""'], - [[0, 3], [0, 3], TokenType::END_OF_LINE, "\n"], - [[1, 0], [1, 3], TokenType::SPACE, ' '], - [[1, 4], [1, 43], TokenType::TEMPLATE_LITERAL_CONTENT, 'This is "template literal" content with '], - [[1, 44], [1, 45], TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\n'], - [[1, 46], [1, 46], TokenType::SPACE, ' '], - [[1, 47], [1, 54], TokenType::TEMPLATE_LITERAL_CONTENT, 'escapes '], - [[1, 55], [1, 58], TokenType::ESCAPE_SEQUENCE_HEXADECIMAL, '\\xB1'], - [[1, 59], [1, 59], TokenType::SPACE, ' '], - [[1, 60], [1, 65], TokenType::ESCAPE_SEQUENCE_UNICODE, '\\u5FA9'], - [[1, 66], [1, 66], TokenType::SPACE, ' '], - [[1, 67], [1, 75], TokenType::ESCAPE_SEQUENCE_UNICODE_CODEPOINT, '\\u{1343E}'], - [[1, 76], [1, 76], TokenType::END_OF_LINE, "\n"], - [[2, 0], [2, 3], TokenType::SPACE, ' '], - [[2, 4], [2, 29], TokenType::TEMPLATE_LITERAL_CONTENT, 'and embedded expressions: '], - [[2, 30], [2, 30], TokenType::BRACKET_CURLY_OPEN, '{'], - [[2, 31], [2, 31], TokenType::BRACKET_CURLY_CLOSE, '}'], - [[2, 32], [2, 32], TokenType::SPACE, ' '], - [[2, 33], [2, 34], TokenType::TEMPLATE_LITERAL_CONTENT, ':)'], - [[2, 35], [2, 35], TokenType::END_OF_LINE, "\n"], - [[3, 0], [3, 3], TokenType::SPACE, ' '], - [[3, 4], [3, 6], TokenType::TEMPLATE_LITERAL_DELIMITER, '"""'], + [[0, 0], [0, 2], Rule::TEMPLATE_LITERAL_DELIMITER, '"""'], + [[0, 3], [0, 3], Rule::END_OF_LINE, "\n"], + [[1, 0], [1, 3], Rule::SPACE, ' '], + [[1, 4], [1, 43], Rule::TEMPLATE_LITERAL_CONTENT, 'This is "template literal" content with '], + [[1, 44], [1, 45], Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\n'], + [[1, 46], [1, 46], Rule::SPACE, ' '], + [[1, 47], [1, 54], Rule::TEMPLATE_LITERAL_CONTENT, 'escapes '], + [[1, 55], [1, 58], Rule::ESCAPE_SEQUENCE_HEXADECIMAL, '\\xB1'], + [[1, 59], [1, 59], Rule::SPACE, ' '], + [[1, 60], [1, 65], Rule::ESCAPE_SEQUENCE_UNICODE, '\\u5FA9'], + [[1, 66], [1, 66], Rule::SPACE, ' '], + [[1, 67], [1, 75], Rule::ESCAPE_SEQUENCE_UNICODE_CODEPOINT, '\\u{1343E}'], + [[1, 76], [1, 76], Rule::END_OF_LINE, "\n"], + [[2, 0], [2, 3], Rule::SPACE, ' '], + [[2, 4], [2, 29], Rule::TEMPLATE_LITERAL_CONTENT, 'and embedded expressions: '], + [[2, 30], [2, 30], Rule::BRACKET_CURLY_OPEN, '{'], + [[2, 31], [2, 31], Rule::BRACKET_CURLY_CLOSE, '}'], + [[2, 32], [2, 32], Rule::SPACE, ' '], + [[2, 33], [2, 34], Rule::TEMPLATE_LITERAL_CONTENT, ':)'], + [[2, 35], [2, 35], Rule::END_OF_LINE, "\n"], + [[3, 0], [3, 3], Rule::SPACE, ' '], + [[3, 4], [3, 6], Rule::TEMPLATE_LITERAL_DELIMITER, '"""'], ]; $source = << [ $source, - TokenTypes::from( - TokenType::BRACKET_ANGLE_OPEN, - TokenType::WORD, - TokenType::SPACE, - TokenType::SYMBOL_EQUALS, - TokenType::STRING_LITERAL_DELIMITER, - TokenType::BRACKET_ANGLE_CLOSE, - TokenType::END_OF_LINE, - TokenType::SYMBOL_DASH, - TokenType::SYMBOL_SLASH_FORWARD, - TokenType::BRACKET_CURLY_OPEN, - TokenType::BRACKET_CURLY_CLOSE, - TokenType::SYMBOL_COLON + Rules::from( + Rule::BRACKET_ANGLE_OPEN, + Rule::WORD, + Rule::SPACE, + Rule::SYMBOL_EQUALS, + Rule::STRING_LITERAL_DELIMITER, + Rule::BRACKET_ANGLE_CLOSE, + Rule::END_OF_LINE, + Rule::SYMBOL_DASH, + Rule::SYMBOL_SLASH_FORWARD, + Rule::BRACKET_CURLY_OPEN, + Rule::BRACKET_CURLY_CLOSE, + Rule::SYMBOL_COLON ), - [[0, 0], [0, 0], TokenType::BRACKET_ANGLE_OPEN, '<'], - [[0, 1], [0, 1], TokenType::WORD, 'a'], - [[0, 2], [0, 2], TokenType::SPACE, ' '], - [[0, 3], [0, 6], TokenType::WORD, 'href'], - [[0, 7], [0, 7], TokenType::SYMBOL_EQUALS, '='], - [[0, 8], [0, 8], TokenType::STRING_LITERAL_DELIMITER, '"'], - [[0, 9], [0, 9], TokenType::STRING_LITERAL_DELIMITER, '"'], - [[0, 10], [0, 10], TokenType::BRACKET_ANGLE_CLOSE, '>'], - [[0, 11], [0, 11], TokenType::END_OF_LINE, "\n"], - [[1, 0], [1, 3], TokenType::SPACE, ' '], - [[1, 4], [1, 4], TokenType::BRACKET_ANGLE_OPEN, '<'], - [[1, 5], [1, 6], TokenType::WORD, 'my'], - [[1, 7], [1, 7], TokenType::SYMBOL_DASH, '-'], - [[1, 8], [1, 14], TokenType::WORD, 'element'], - [[1, 15], [1, 15], TokenType::SYMBOL_SLASH_FORWARD, '/'], - [[1, 16], [1, 16], TokenType::BRACKET_ANGLE_CLOSE, '>'], - [[1, 17], [1, 17], TokenType::END_OF_LINE, "\n"], - [[2, 0], [2, 3], TokenType::SPACE, ' '], - [[2, 4], [2, 4], TokenType::BRACKET_ANGLE_OPEN, '<'], - [[2, 5], [2, 7], TokenType::WORD, 'div'], - [[2, 8], [2, 8], TokenType::SPACE, ' '], - [[2, 9], [2, 13], TokenType::WORD, 'class'], - [[2, 14], [2, 14], TokenType::SYMBOL_EQUALS, '='], - [[2, 15], [2, 15], TokenType::BRACKET_CURLY_OPEN, '{'], - [[2, 16], [2, 16], TokenType::BRACKET_CURLY_CLOSE, '}'], - [[2, 17], [2, 17], TokenType::SPACE, ' '], - [[2, 18], [2, 20], TokenType::WORD, 'foo'], - [[2, 21], [2, 21], TokenType::SYMBOL_COLON, ':'], - [[2, 22], [2, 24], TokenType::WORD, 'bar'], - [[2, 25], [2, 25], TokenType::BRACKET_ANGLE_CLOSE, '>'], - [[2, 26], [2, 26], TokenType::END_OF_LINE, "\n"], - [[3, 0], [3, 3], TokenType::SPACE, ' '], - [[3, 4], [3, 4], TokenType::BRACKET_ANGLE_OPEN, '<'], - [[3, 5], [3, 5], TokenType::SYMBOL_SLASH_FORWARD, '/'], - [[3, 6], [3, 8], TokenType::WORD, 'div'], - [[3, 9], [3, 9], TokenType::BRACKET_ANGLE_CLOSE, '>'], - [[3, 10], [3, 10], TokenType::END_OF_LINE, "\n"], - [[4, 0], [4, 0], TokenType::BRACKET_ANGLE_OPEN, '<'], - [[4, 1], [4, 1], TokenType::SYMBOL_SLASH_FORWARD, '/'], - [[4, 2], [4, 2], TokenType::WORD, 'a'], - [[4, 3], [4, 3], TokenType::BRACKET_ANGLE_CLOSE, '>'], + [[0, 0], [0, 0], Rule::BRACKET_ANGLE_OPEN, '<'], + [[0, 1], [0, 1], Rule::WORD, 'a'], + [[0, 2], [0, 2], Rule::SPACE, ' '], + [[0, 3], [0, 6], Rule::WORD, 'href'], + [[0, 7], [0, 7], Rule::SYMBOL_EQUALS, '='], + [[0, 8], [0, 8], Rule::STRING_LITERAL_DELIMITER, '"'], + [[0, 9], [0, 9], Rule::STRING_LITERAL_DELIMITER, '"'], + [[0, 10], [0, 10], Rule::BRACKET_ANGLE_CLOSE, '>'], + [[0, 11], [0, 11], Rule::END_OF_LINE, "\n"], + [[1, 0], [1, 3], Rule::SPACE, ' '], + [[1, 4], [1, 4], Rule::BRACKET_ANGLE_OPEN, '<'], + [[1, 5], [1, 6], Rule::WORD, 'my'], + [[1, 7], [1, 7], Rule::SYMBOL_DASH, '-'], + [[1, 8], [1, 14], Rule::WORD, 'element'], + [[1, 15], [1, 15], Rule::SYMBOL_SLASH_FORWARD, '/'], + [[1, 16], [1, 16], Rule::BRACKET_ANGLE_CLOSE, '>'], + [[1, 17], [1, 17], Rule::END_OF_LINE, "\n"], + [[2, 0], [2, 3], Rule::SPACE, ' '], + [[2, 4], [2, 4], Rule::BRACKET_ANGLE_OPEN, '<'], + [[2, 5], [2, 7], Rule::WORD, 'div'], + [[2, 8], [2, 8], Rule::SPACE, ' '], + [[2, 9], [2, 13], Rule::WORD, 'class'], + [[2, 14], [2, 14], Rule::SYMBOL_EQUALS, '='], + [[2, 15], [2, 15], Rule::BRACKET_CURLY_OPEN, '{'], + [[2, 16], [2, 16], Rule::BRACKET_CURLY_CLOSE, '}'], + [[2, 17], [2, 17], Rule::SPACE, ' '], + [[2, 18], [2, 20], Rule::WORD, 'foo'], + [[2, 21], [2, 21], Rule::SYMBOL_COLON, ':'], + [[2, 22], [2, 24], Rule::WORD, 'bar'], + [[2, 25], [2, 25], Rule::BRACKET_ANGLE_CLOSE, '>'], + [[2, 26], [2, 26], Rule::END_OF_LINE, "\n"], + [[3, 0], [3, 3], Rule::SPACE, ' '], + [[3, 4], [3, 4], Rule::BRACKET_ANGLE_OPEN, '<'], + [[3, 5], [3, 5], Rule::SYMBOL_SLASH_FORWARD, '/'], + [[3, 6], [3, 8], Rule::WORD, 'div'], + [[3, 9], [3, 9], Rule::BRACKET_ANGLE_CLOSE, '>'], + [[3, 10], [3, 10], Rule::END_OF_LINE, "\n"], + [[4, 0], [4, 0], Rule::BRACKET_ANGLE_OPEN, '<'], + [[4, 1], [4, 1], Rule::SYMBOL_SLASH_FORWARD, '/'], + [[4, 2], [4, 2], Rule::WORD, 'a'], + [[4, 3], [4, 3], Rule::BRACKET_ANGLE_CLOSE, '>'], ]; $source = << [ $source, - TokenTypes::from( - TokenType::TEXT, - TokenType::BRACKET_CURLY_OPEN, - TokenType::BRACKET_CURLY_CLOSE, - TokenType::SPACE, - TokenType::END_OF_LINE, - TokenType::BRACKET_ANGLE_OPEN, - TokenType::BRACKET_ANGLE_CLOSE + Rules::from( + Rule::TEXT, + Rule::BRACKET_CURLY_OPEN, + Rule::BRACKET_CURLY_CLOSE, + Rule::SPACE, + Rule::END_OF_LINE, + Rule::BRACKET_ANGLE_OPEN, + Rule::BRACKET_ANGLE_CLOSE ), - [[0, 0], [0, 30], TokenType::TEXT, 'ThisIsSomeText-with-expressions'], - [[0, 31], [0, 31], TokenType::BRACKET_CURLY_OPEN, '{'], - [[0, 32], [0, 32], TokenType::BRACKET_CURLY_CLOSE, '}'], - [[0, 33], [0, 33], TokenType::END_OF_LINE, "\n"], - [[1, 0], [1, 11], TokenType::TEXT, 'line-breaks,'], - [[1, 12], [1, 14], TokenType::SPACE, ' '], - [[1, 15], [1, 20], TokenType::TEXT, 'spaces'], - [[1, 21], [1, 23], TokenType::SPACE, ' '], - [[1, 24], [1, 30], TokenType::TEXT, 'andTags'], - [[1, 31], [1, 31], TokenType::BRACKET_ANGLE_OPEN, '<'], - [[1, 32], [1, 32], TokenType::BRACKET_ANGLE_CLOSE, '>'], - [[1, 33], [1, 39], TokenType::TEXT, 'inside.'], + [[0, 0], [0, 30], Rule::TEXT, 'ThisIsSomeText-with-expressions'], + [[0, 31], [0, 31], Rule::BRACKET_CURLY_OPEN, '{'], + [[0, 32], [0, 32], Rule::BRACKET_CURLY_CLOSE, '}'], + [[0, 33], [0, 33], Rule::END_OF_LINE, "\n"], + [[1, 0], [1, 11], Rule::TEXT, 'line-breaks,'], + [[1, 12], [1, 14], Rule::SPACE, ' '], + [[1, 15], [1, 20], Rule::TEXT, 'spaces'], + [[1, 21], [1, 23], Rule::SPACE, ' '], + [[1, 24], [1, 30], Rule::TEXT, 'andTags'], + [[1, 31], [1, 31], Rule::BRACKET_ANGLE_OPEN, '<'], + [[1, 32], [1, 32], Rule::BRACKET_ANGLE_CLOSE, '>'], + [[1, 33], [1, 39], Rule::TEXT, 'inside.'], ]; } @@ -502,12 +502,12 @@ public static function multipleTokensExamples(): iterable * @dataProvider multipleTokensExamples * @test * @param string $source - * @param array{array{int,int},array{int,int},TokenType,string} ...$expectedLexerStates + * @param array{array{int,int},array{int,int},Rule,string} ...$expectedLexerStates * @return void */ - public function testReadOneOfWithMultipleTokenTypes( + public function testReadOneOfWithMultipleRules( string $source, - TokenTypes $tokenTypes, + Rules $tokenTypes, array ...$expectedLexerStates ): void { $this->lexer = new Lexer($source); @@ -531,7 +531,7 @@ public function testReadOneOfWithMultipleTokenTypes( public static function failingSingleTokenExamples(): iterable { $example = static function ( - TokenType $type, + Rule $type, string $source, string $unexpectedCharacterSequence ) { @@ -546,132 +546,132 @@ public static function failingSingleTokenExamples(): iterable ]; }; - yield from $example(TokenType::COMMENT, 'Anything that does not start with "#"', 'A'); - - yield from $example(TokenType::KEYWORD_FROM, 'false', 'fa'); - yield from $example(TokenType::KEYWORD_IMPORT, 'implausible', 'impl'); - yield from $example(TokenType::KEYWORD_EXPORT, 'ex-machina', 'ex-'); - yield from $example(TokenType::KEYWORD_ENUM, 'enough', 'eno'); - yield from $example(TokenType::KEYWORD_STRUCT, 'strict', 'stri'); - yield from $example(TokenType::KEYWORD_COMPONENT, 'composition', 'compos'); - yield from $example(TokenType::KEYWORD_MATCH, 'matter', 'matt'); - yield from $example(TokenType::KEYWORD_DEFAULT, 'definition', 'defi'); - yield from $example(TokenType::KEYWORD_RETURN, 'retroactive', 'retr'); - yield from $example(TokenType::KEYWORD_TRUE, 'truth', 'trut'); - yield from $example(TokenType::KEYWORD_FALSE, 'falsify', 'falsi'); - yield from $example(TokenType::KEYWORD_NULL, 'nuclear', 'nuc'); - - yield from $example(TokenType::STRING_LITERAL_DELIMITER, '\'', '\''); - yield from $example(TokenType::STRING_LITERAL_CONTENT, '"', '"'); - yield from $example(TokenType::STRING_LITERAL_CONTENT, '\\', '\\'); - - yield from $example(TokenType::INTEGER_BINARY, '001001', '00'); - yield from $example(TokenType::INTEGER_BINARY, '0b21', '0b2'); - yield from $example(TokenType::INTEGER_OCTAL, '0p12345670', '0p'); - yield from $example(TokenType::INTEGER_OCTAL, '0o84', '0o8'); - yield from $example(TokenType::INTEGER_DECIMAL, ' ', ' '); - yield from $example(TokenType::INTEGER_DECIMAL, 'foo', 'f'); - yield from $example(TokenType::INTEGER_HEXADECIMAL, '0xG', '0xG'); - yield from $example(TokenType::INTEGER_HEXADECIMAL, '0yFFAA00', '0y'); - - yield from $example(TokenType::TEMPLATE_LITERAL_DELIMITER, '`', '`'); - yield from $example(TokenType::TEMPLATE_LITERAL_CONTENT, '{', '{'); - yield from $example(TokenType::TEMPLATE_LITERAL_CONTENT, '}', '}'); - yield from $example(TokenType::TEMPLATE_LITERAL_CONTENT, "\n", "\n"); - yield from $example(TokenType::TEMPLATE_LITERAL_CONTENT, '\\', '\\'); - - yield from $example(TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\o', '\\o'); - yield from $example(TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\ü', '\\ü'); - yield from $example(TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\£', '\\£'); - yield from $example(TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\И', '\\И'); - yield from $example(TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\ह', '\\ह'); - yield from $example(TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\€', '\\€'); - yield from $example(TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\𐍈', '\\𐍈'); - yield from $example(TokenType::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\8', '\\8'); - yield from $example(TokenType::ESCAPE_SEQUENCE_HEXADECIMAL, '\\x9G', '\\x9G'); - yield from $example(TokenType::ESCAPE_SEQUENCE_UNICODE, '\\u00AY', '\\u00AY'); - yield from $example(TokenType::ESCAPE_SEQUENCE_UNICODE_CODEPOINT, '\\u{2F8O4}', '\\u{2F8O'); - - yield from $example(TokenType::BRACKET_CURLY_OPEN, 'a', 'a'); - yield from $example(TokenType::BRACKET_CURLY_OPEN, '😱', '😱'); - yield from $example(TokenType::BRACKET_CURLY_CLOSE, 'b', 'b'); - yield from $example(TokenType::BRACKET_CURLY_CLOSE, '🖖', '🖖'); - yield from $example(TokenType::BRACKET_ROUND_OPEN, 'c', 'c'); - yield from $example(TokenType::BRACKET_ROUND_OPEN, '🌈', '🌈'); - yield from $example(TokenType::BRACKET_ROUND_CLOSE, 'd', 'd'); - yield from $example(TokenType::BRACKET_ROUND_CLOSE, '⚓', '⚓'); - yield from $example(TokenType::BRACKET_SQUARE_OPEN, 'e', 'e'); - yield from $example(TokenType::BRACKET_SQUARE_OPEN, '☘', '☘'); - yield from $example(TokenType::BRACKET_SQUARE_CLOSE, 'f', 'f'); - yield from $example(TokenType::BRACKET_SQUARE_CLOSE, '🎷', '🎷'); - yield from $example(TokenType::BRACKET_ANGLE_OPEN, 'g', 'g'); - yield from $example(TokenType::BRACKET_ANGLE_OPEN, '🐒', '🐒'); - yield from $example(TokenType::BRACKET_ANGLE_CLOSE, 'h', 'h'); - yield from $example(TokenType::BRACKET_ANGLE_CLOSE, '💡', '💡'); - - yield from $example(TokenType::SYMBOL_PERIOD, 'i', 'i'); - yield from $example(TokenType::SYMBOL_PERIOD, '?.', '?'); - yield from $example(TokenType::SYMBOL_COLON, '-', '-'); - yield from $example(TokenType::SYMBOL_COLON, '➗', '➗'); - yield from $example(TokenType::SYMBOL_QUESTIONMARK, '❓', '❓'); - yield from $example(TokenType::SYMBOL_EXCLAMATIONMARK, '❗', '❗'); - yield from $example(TokenType::SYMBOL_COMMA, '.', '.'); - yield from $example(TokenType::SYMBOL_DASH, '➖', '➖'); - yield from $example(TokenType::SYMBOL_EQUALS, '<=', '<'); - yield from $example(TokenType::SYMBOL_SLASH_FORWARD, '\\', '\\'); - yield from $example(TokenType::SYMBOL_PIPE, '🌵', '🌵'); - yield from $example(TokenType::SYMBOL_BOOLEAN_AND, '§§', '§'); - yield from $example(TokenType::SYMBOL_BOOLEAN_OR, '//', '/'); - yield from $example(TokenType::SYMBOL_STRICT_EQUALS, '!==', '!'); - yield from $example(TokenType::SYMBOL_NOT_EQUALS, '===', '='); - yield from $example(TokenType::SYMBOL_GREATER_THAN_OR_EQUAL, '=>', '='); - yield from $example(TokenType::SYMBOL_LESS_THAN_OR_EQUAL, '=<', '='); - yield from $example(TokenType::SYMBOL_ARROW_SINGLE, '=>', '='); - yield from $example(TokenType::SYMBOL_OPTCHAIN, '??', '??'); - yield from $example(TokenType::SYMBOL_NULLISH_COALESCE, '?.', '?.'); - - yield from $example(TokenType::WORD, '!NotAValidWord', '!'); - yield from $example(TokenType::WORD, '?N0t4V4l1dW0rd', '?'); - yield from $example(TokenType::WORD, '...1245NotAValidWord', '.'); - - yield from $example(TokenType::TEXT, '<', '<'); - yield from $example(TokenType::TEXT, '>', '>'); - yield from $example(TokenType::TEXT, '{', '{'); - yield from $example(TokenType::TEXT, '}', '}'); - - yield from $example(TokenType::SPACE, '{', '{'); - yield from $example(TokenType::SPACE, '}', '}'); - yield from $example(TokenType::SPACE, '💡', '💡'); - yield from $example(TokenType::SPACE, 'Anything but space', 'A'); - - yield from $example(TokenType::END_OF_LINE, '{', '{'); - yield from $example(TokenType::END_OF_LINE, '}', '}'); - yield from $example(TokenType::END_OF_LINE, '💡', '💡'); - yield from $example(TokenType::END_OF_LINE, 'Anything but \\n', 'A'); + yield from $example(Rule::COMMENT, 'Anything that does not start with "#"', 'A'); + + yield from $example(Rule::KEYWORD_FROM, 'false', 'fa'); + yield from $example(Rule::KEYWORD_IMPORT, 'implausible', 'impl'); + yield from $example(Rule::KEYWORD_EXPORT, 'ex-machina', 'ex-'); + yield from $example(Rule::KEYWORD_ENUM, 'enough', 'eno'); + yield from $example(Rule::KEYWORD_STRUCT, 'strict', 'stri'); + yield from $example(Rule::KEYWORD_COMPONENT, 'composition', 'compos'); + yield from $example(Rule::KEYWORD_MATCH, 'matter', 'matt'); + yield from $example(Rule::KEYWORD_DEFAULT, 'definition', 'defi'); + yield from $example(Rule::KEYWORD_RETURN, 'retroactive', 'retr'); + yield from $example(Rule::KEYWORD_TRUE, 'truth', 'trut'); + yield from $example(Rule::KEYWORD_FALSE, 'falsify', 'falsi'); + yield from $example(Rule::KEYWORD_NULL, 'nuclear', 'nuc'); + + yield from $example(Rule::STRING_LITERAL_DELIMITER, '\'', '\''); + yield from $example(Rule::STRING_LITERAL_CONTENT, '"', '"'); + yield from $example(Rule::STRING_LITERAL_CONTENT, '\\', '\\'); + + yield from $example(Rule::INTEGER_BINARY, '001001', '00'); + yield from $example(Rule::INTEGER_BINARY, '0b21', '0b2'); + yield from $example(Rule::INTEGER_OCTAL, '0p12345670', '0p'); + yield from $example(Rule::INTEGER_OCTAL, '0o84', '0o8'); + yield from $example(Rule::INTEGER_DECIMAL, ' ', ' '); + yield from $example(Rule::INTEGER_DECIMAL, 'foo', 'f'); + yield from $example(Rule::INTEGER_HEXADECIMAL, '0xG', '0xG'); + yield from $example(Rule::INTEGER_HEXADECIMAL, '0yFFAA00', '0y'); + + yield from $example(Rule::TEMPLATE_LITERAL_DELIMITER, '`', '`'); + yield from $example(Rule::TEMPLATE_LITERAL_CONTENT, '{', '{'); + yield from $example(Rule::TEMPLATE_LITERAL_CONTENT, '}', '}'); + yield from $example(Rule::TEMPLATE_LITERAL_CONTENT, "\n", "\n"); + yield from $example(Rule::TEMPLATE_LITERAL_CONTENT, '\\', '\\'); + + yield from $example(Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\o', '\\o'); + yield from $example(Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\ü', '\\ü'); + yield from $example(Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\£', '\\£'); + yield from $example(Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\И', '\\И'); + yield from $example(Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\ह', '\\ह'); + yield from $example(Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\€', '\\€'); + yield from $example(Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\𐍈', '\\𐍈'); + yield from $example(Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\8', '\\8'); + yield from $example(Rule::ESCAPE_SEQUENCE_HEXADECIMAL, '\\x9G', '\\x9G'); + yield from $example(Rule::ESCAPE_SEQUENCE_UNICODE, '\\u00AY', '\\u00AY'); + yield from $example(Rule::ESCAPE_SEQUENCE_UNICODE_CODEPOINT, '\\u{2F8O4}', '\\u{2F8O'); + + yield from $example(Rule::BRACKET_CURLY_OPEN, 'a', 'a'); + yield from $example(Rule::BRACKET_CURLY_OPEN, '😱', '😱'); + yield from $example(Rule::BRACKET_CURLY_CLOSE, 'b', 'b'); + yield from $example(Rule::BRACKET_CURLY_CLOSE, '🖖', '🖖'); + yield from $example(Rule::BRACKET_ROUND_OPEN, 'c', 'c'); + yield from $example(Rule::BRACKET_ROUND_OPEN, '🌈', '🌈'); + yield from $example(Rule::BRACKET_ROUND_CLOSE, 'd', 'd'); + yield from $example(Rule::BRACKET_ROUND_CLOSE, '⚓', '⚓'); + yield from $example(Rule::BRACKET_SQUARE_OPEN, 'e', 'e'); + yield from $example(Rule::BRACKET_SQUARE_OPEN, '☘', '☘'); + yield from $example(Rule::BRACKET_SQUARE_CLOSE, 'f', 'f'); + yield from $example(Rule::BRACKET_SQUARE_CLOSE, '🎷', '🎷'); + yield from $example(Rule::BRACKET_ANGLE_OPEN, 'g', 'g'); + yield from $example(Rule::BRACKET_ANGLE_OPEN, '🐒', '🐒'); + yield from $example(Rule::BRACKET_ANGLE_CLOSE, 'h', 'h'); + yield from $example(Rule::BRACKET_ANGLE_CLOSE, '💡', '💡'); + + yield from $example(Rule::SYMBOL_PERIOD, 'i', 'i'); + yield from $example(Rule::SYMBOL_PERIOD, '?.', '?'); + yield from $example(Rule::SYMBOL_COLON, '-', '-'); + yield from $example(Rule::SYMBOL_COLON, '➗', '➗'); + yield from $example(Rule::SYMBOL_QUESTIONMARK, '❓', '❓'); + yield from $example(Rule::SYMBOL_EXCLAMATIONMARK, '❗', '❗'); + yield from $example(Rule::SYMBOL_COMMA, '.', '.'); + yield from $example(Rule::SYMBOL_DASH, '➖', '➖'); + yield from $example(Rule::SYMBOL_EQUALS, '<=', '<'); + yield from $example(Rule::SYMBOL_SLASH_FORWARD, '\\', '\\'); + yield from $example(Rule::SYMBOL_PIPE, '🌵', '🌵'); + yield from $example(Rule::SYMBOL_BOOLEAN_AND, '§§', '§'); + yield from $example(Rule::SYMBOL_BOOLEAN_OR, '//', '/'); + yield from $example(Rule::SYMBOL_STRICT_EQUALS, '!==', '!'); + yield from $example(Rule::SYMBOL_NOT_EQUALS, '===', '='); + yield from $example(Rule::SYMBOL_GREATER_THAN_OR_EQUAL, '=>', '='); + yield from $example(Rule::SYMBOL_LESS_THAN_OR_EQUAL, '=<', '='); + yield from $example(Rule::SYMBOL_ARROW_SINGLE, '=>', '='); + yield from $example(Rule::SYMBOL_OPTCHAIN, '??', '??'); + yield from $example(Rule::SYMBOL_NULLISH_COALESCE, '?.', '?.'); + + yield from $example(Rule::WORD, '!NotAValidWord', '!'); + yield from $example(Rule::WORD, '?N0t4V4l1dW0rd', '?'); + yield from $example(Rule::WORD, '...1245NotAValidWord', '.'); + + yield from $example(Rule::TEXT, '<', '<'); + yield from $example(Rule::TEXT, '>', '>'); + yield from $example(Rule::TEXT, '{', '{'); + yield from $example(Rule::TEXT, '}', '}'); + + yield from $example(Rule::SPACE, '{', '{'); + yield from $example(Rule::SPACE, '}', '}'); + yield from $example(Rule::SPACE, '💡', '💡'); + yield from $example(Rule::SPACE, 'Anything but space', 'A'); + + yield from $example(Rule::END_OF_LINE, '{', '{'); + yield from $example(Rule::END_OF_LINE, '}', '}'); + yield from $example(Rule::END_OF_LINE, '💡', '💡'); + yield from $example(Rule::END_OF_LINE, 'Anything but \\n', 'A'); } /** * @dataProvider failingSingleTokenExamples * @test * @param string $source - * @param TokenType $expectedTokenType + * @param Rule $expectedRule * @param Range $affectedRangeInSource * @param string $actualTokenValue * @return void */ - public function throwsIfCharacterSequenceDoesNotMatchSingleTokenType( + public function throwsIfCharacterSequenceDoesNotMatchSingleRule( string $source, - TokenType $expectedTokenType, + Rule $expectedRule, Range $affectedRangeInSource, string $actualTokenValue ): void { $this->assertThrowsLexerException( - function () use ($source, $expectedTokenType) { + function () use ($source, $expectedRule) { $this->lexer = new Lexer($source); - $this->lexer->read($expectedTokenType); + $this->lexer->read($expectedRule); }, LexerException::becauseOfUnexpectedCharacterSequence( - expectedTokenTypes: TokenTypes::from($expectedTokenType), + expectedRules: Rules::from($expectedRule), affectedRangeInSource: $affectedRangeInSource, actualCharacterSequence: $actualTokenValue ) @@ -685,10 +685,10 @@ public static function failingMultipleTokensExamples(): iterable { yield ($source = "# This is a comment\nThis is not a comment") => [ $source, - $tokenTypes = TokenTypes::from(TokenType::COMMENT, TokenType::END_OF_LINE), + $tokenTypes = Rules::from(Rule::COMMENT, Rule::END_OF_LINE), 3, LexerException::becauseOfUnexpectedCharacterSequence( - expectedTokenTypes: $tokenTypes, + expectedRules: $tokenTypes, affectedRangeInSource: Range::from( Position::from(1, 0), Position::from(1, 0) @@ -702,14 +702,14 @@ public static function failingMultipleTokensExamples(): iterable * @dataProvider failingMultipleTokensExamples * @test * @param string $source - * @param TokenTypes $tokenTypes + * @param Rules $tokenTypes * @param integer $numberOfReadOperations * @param LexerException $expectedLexerException * @return void */ - public function throwsIfCharacterSequenceDoesNotMatchMultipleTokenTypes( + public function throwsIfCharacterSequenceDoesNotMatchMultipleRules( string $source, - TokenTypes $tokenTypes, + Rules $tokenTypes, int $numberOfReadOperations, LexerException $expectedLexerException ): void { @@ -728,15 +728,15 @@ function () use ($source, $tokenTypes, $numberOfReadOperations) { /** * @test */ - public function throwsIfSourceEndsUnexpectedlyWhileReadingASingleTokenType(): void + public function throwsIfSourceEndsUnexpectedlyWhileReadingASingleRule(): void { $this->assertThrowsLexerException( function () { $this->lexer = new Lexer(''); - $this->lexer->read(TokenType::KEYWORD_NULL); + $this->lexer->read(Rule::KEYWORD_NULL); }, LexerException::becauseOfUnexpectedEndOfSource( - expectedTokenTypes: TokenTypes::from(TokenType::KEYWORD_NULL), + expectedRules: Rules::from(Rule::KEYWORD_NULL), affectedRangeInSource: Range::from( Position::from(0, 0), Position::from(0, 0) @@ -747,11 +747,11 @@ function () { $this->assertThrowsLexerException( function () { $lexer = new Lexer('null'); - $lexer->read(TokenType::KEYWORD_NULL); - $lexer->read(TokenType::KEYWORD_NULL); + $lexer->read(Rule::KEYWORD_NULL); + $lexer->read(Rule::KEYWORD_NULL); }, LexerException::becauseOfUnexpectedEndOfSource( - expectedTokenTypes: TokenTypes::from(TokenType::KEYWORD_NULL), + expectedRules: Rules::from(Rule::KEYWORD_NULL), affectedRangeInSource: Range::from( Position::from(0, 0), Position::from(0, 4) @@ -763,18 +763,18 @@ function () { /** * @return iterable */ - public static function multipleTokenTypeUnexpectedEndOfSourceExamples(): iterable + public static function multipleRuleUnexpectedEndOfSourceExamples(): iterable { yield ($source = '') => [ $source, - $tokenTypes = TokenTypes::from( - TokenType::KEYWORD_RETURN, - TokenType::KEYWORD_NULL, - TokenType::SPACE + $tokenTypes = Rules::from( + Rule::KEYWORD_RETURN, + Rule::KEYWORD_NULL, + Rule::SPACE ), 1, LexerException::becauseOfUnexpectedEndOfSource( - expectedTokenTypes: $tokenTypes, + expectedRules: $tokenTypes, affectedRangeInSource: Range::from( Position::from(0, 0), Position::from(0, 0) @@ -784,14 +784,14 @@ public static function multipleTokenTypeUnexpectedEndOfSourceExamples(): iterabl yield ($source = 'return') => [ $source, - $tokenTypes = TokenTypes::from( - TokenType::KEYWORD_RETURN, - TokenType::KEYWORD_NULL, - TokenType::SPACE + $tokenTypes = Rules::from( + Rule::KEYWORD_RETURN, + Rule::KEYWORD_NULL, + Rule::SPACE ), 2, LexerException::becauseOfUnexpectedEndOfSource( - expectedTokenTypes: $tokenTypes, + expectedRules: $tokenTypes, affectedRangeInSource: Range::from( Position::from(0, 6), Position::from(0, 6) @@ -801,14 +801,14 @@ public static function multipleTokenTypeUnexpectedEndOfSourceExamples(): iterabl yield ($source = 'return ') => [ $source, - $tokenTypes = TokenTypes::from( - TokenType::KEYWORD_RETURN, - TokenType::KEYWORD_NULL, - TokenType::SPACE + $tokenTypes = Rules::from( + Rule::KEYWORD_RETURN, + Rule::KEYWORD_NULL, + Rule::SPACE ), 3, LexerException::becauseOfUnexpectedEndOfSource( - expectedTokenTypes: $tokenTypes, + expectedRules: $tokenTypes, affectedRangeInSource: Range::from( Position::from(0, 7), Position::from(0, 7) @@ -818,17 +818,17 @@ public static function multipleTokenTypeUnexpectedEndOfSourceExamples(): iterabl } /** - * @dataProvider multipleTokenTypeUnexpectedEndOfSourceExamples + * @dataProvider multipleRuleUnexpectedEndOfSourceExamples * @test * @param string $source - * @param TokenTypes $tokenTypes + * @param Rules $tokenTypes * @param integer $numberOfReadOperations * @param LexerException $expectedLexerException * @return void */ - public function throwsIfSourceEndsUnexpectedlyWhileReadingMultipleTokenTypes( + public function throwsIfSourceEndsUnexpectedlyWhileReadingMultipleRules( string $source, - TokenTypes $tokenTypes, + Rules $tokenTypes, int $numberOfReadOperations, LexerException $expectedLexerException ): void { @@ -852,14 +852,14 @@ public function skipsSpace(): void // Single $this->lexer = new Lexer('return ' . "\t\n\t" . ' 42'); - $this->lexer->read(TokenType::KEYWORD_RETURN); + $this->lexer->read(Rule::KEYWORD_RETURN); $this->lexer->skipSpace(); - $this->lexer->read(TokenType::INTEGER_DECIMAL); + $this->lexer->read(Rule::INTEGER_DECIMAL); $this->assertLexerState( startPosition: Position::from(1, 4), endPosition: Position::from(1, 5), - tokenTypeUnderCursor: TokenType::INTEGER_DECIMAL, + tokenTypeUnderCursor: Rule::INTEGER_DECIMAL, buffer: '42', isEnd: true ); @@ -867,14 +867,14 @@ public function skipsSpace(): void // Multiple $this->lexer = new Lexer('return ' . "\t\n\t" . ' 42'); - $this->lexer->readOneOf(TokenTypes::from(TokenType::KEYWORD_RETURN, TokenType::INTEGER_DECIMAL)); + $this->lexer->readOneOf(Rules::from(Rule::KEYWORD_RETURN, Rule::INTEGER_DECIMAL)); $this->lexer->skipSpace(); - $this->lexer->readOneOf(TokenTypes::from(TokenType::KEYWORD_RETURN, TokenType::INTEGER_DECIMAL)); + $this->lexer->readOneOf(Rules::from(Rule::KEYWORD_RETURN, Rule::INTEGER_DECIMAL)); $this->assertLexerState( startPosition: Position::from(1, 4), endPosition: Position::from(1, 5), - tokenTypeUnderCursor: TokenType::INTEGER_DECIMAL, + tokenTypeUnderCursor: Rule::INTEGER_DECIMAL, buffer: '42', isEnd: true ); @@ -898,16 +898,16 @@ public function skipsSpaceAndComments(): void // Single $this->lexer = new Lexer($source); - $this->lexer->read(TokenType::KEYWORD_IMPORT); + $this->lexer->read(Rule::KEYWORD_IMPORT); $this->lexer->skipSpaceAndComments(); - $this->lexer->read(TokenType::KEYWORD_EXPORT); + $this->lexer->read(Rule::KEYWORD_EXPORT); $this->lexer->skipSpaceAndComments(); - $this->lexer->read(TokenType::KEYWORD_COMPONENT); + $this->lexer->read(Rule::KEYWORD_COMPONENT); $this->assertLexerState( startPosition: Position::from(6, 4), endPosition: Position::from(6, 12), - tokenTypeUnderCursor: TokenType::KEYWORD_COMPONENT, + tokenTypeUnderCursor: Rule::KEYWORD_COMPONENT, buffer: 'component', isEnd: true ); @@ -915,33 +915,33 @@ public function skipsSpaceAndComments(): void // Multiple $this->lexer = new Lexer($source); $this->lexer->readOneOf( - TokenTypes::from( - TokenType::KEYWORD_IMPORT, - TokenType::KEYWORD_EXPORT, - TokenType::KEYWORD_COMPONENT + Rules::from( + Rule::KEYWORD_IMPORT, + Rule::KEYWORD_EXPORT, + Rule::KEYWORD_COMPONENT ) ); $this->lexer->skipSpaceAndComments(); $this->lexer->readOneOf( - TokenTypes::from( - TokenType::KEYWORD_IMPORT, - TokenType::KEYWORD_EXPORT, - TokenType::KEYWORD_COMPONENT + Rules::from( + Rule::KEYWORD_IMPORT, + Rule::KEYWORD_EXPORT, + Rule::KEYWORD_COMPONENT ) ); $this->lexer->skipSpaceAndComments(); $this->lexer->readOneOf( - TokenTypes::from( - TokenType::KEYWORD_IMPORT, - TokenType::KEYWORD_EXPORT, - TokenType::KEYWORD_COMPONENT + Rules::from( + Rule::KEYWORD_IMPORT, + Rule::KEYWORD_EXPORT, + Rule::KEYWORD_COMPONENT ) ); $this->assertLexerState( startPosition: Position::from(6, 4), endPosition: Position::from(6, 12), - tokenTypeUnderCursor: TokenType::KEYWORD_COMPONENT, + tokenTypeUnderCursor: Rule::KEYWORD_COMPONENT, buffer: 'component', isEnd: true ); diff --git a/test/Unit/Language/Parser/Export/ExportParserTest.php b/test/Unit/Language/Parser/Export/ExportParserTest.php index fd75d3b..2ee37f0 100644 --- a/test/Unit/Language/Parser/Export/ExportParserTest.php +++ b/test/Unit/Language/Parser/Export/ExportParserTest.php @@ -49,8 +49,8 @@ use PackageFactory\ComponentEngine\Language\AST\Node\ValueReference\ValueReferenceNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\LexerException; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Language\Parser\Export\ExportCouldNotBeParsed; use PackageFactory\ComponentEngine\Language\Parser\Export\ExportParser; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; @@ -190,10 +190,10 @@ function () { }, ExportCouldNotBeParsed::becauseOfLexerException( cause: LexerException::becauseOfUnexpectedCharacterSequence( - expectedTokenTypes: TokenTypes::from( - TokenType::KEYWORD_COMPONENT, - TokenType::KEYWORD_ENUM, - TokenType::KEYWORD_STRUCT + expectedRules: Rules::from( + Rule::KEYWORD_COMPONENT, + Rule::KEYWORD_ENUM, + Rule::KEYWORD_STRUCT ), affectedRangeInSource: $this->range([0, 7], [0, 7]), actualCharacterSequence: 'n' diff --git a/test/Unit/Language/Parser/IntegerLiteral/IntegerLiteralParserTest.php b/test/Unit/Language/Parser/IntegerLiteral/IntegerLiteralParserTest.php index 6a6f6f6..147d7ba 100644 --- a/test/Unit/Language/Parser/IntegerLiteral/IntegerLiteralParserTest.php +++ b/test/Unit/Language/Parser/IntegerLiteral/IntegerLiteralParserTest.php @@ -26,8 +26,8 @@ use PackageFactory\ComponentEngine\Language\AST\Node\IntegerLiteral\IntegerLiteralNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\LexerException; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType; -use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Language\Parser\IntegerLiteral\IntegerLiteralCouldNotBeParsed; use PackageFactory\ComponentEngine\Language\Parser\IntegerLiteral\IntegerLiteralParser; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; @@ -128,11 +128,11 @@ function () { }, IntegerLiteralCouldNotBeParsed::becauseOfLexerException( cause: LexerException::becauseOfUnexpectedEndOfSource( - expectedTokenTypes: TokenTypes::from( - TokenType::INTEGER_HEXADECIMAL, - TokenType::INTEGER_DECIMAL, - TokenType::INTEGER_OCTAL, - TokenType::INTEGER_BINARY + expectedRules: Rules::from( + Rule::INTEGER_HEXADECIMAL, + Rule::INTEGER_DECIMAL, + Rule::INTEGER_OCTAL, + Rule::INTEGER_BINARY ), affectedRangeInSource: $this->range([0, 0], [0, 0]) ) @@ -154,11 +154,11 @@ function () { }, IntegerLiteralCouldNotBeParsed::becauseOfLexerException( cause: LexerException::becauseOfUnexpectedCharacterSequence( - expectedTokenTypes: TokenTypes::from( - TokenType::INTEGER_HEXADECIMAL, - TokenType::INTEGER_DECIMAL, - TokenType::INTEGER_OCTAL, - TokenType::INTEGER_BINARY + expectedRules: Rules::from( + Rule::INTEGER_HEXADECIMAL, + Rule::INTEGER_DECIMAL, + Rule::INTEGER_OCTAL, + Rule::INTEGER_BINARY ), affectedRangeInSource: $this->range([0, 0], [0, 0]), actualCharacterSequence: 'f' From c50ac6a3b15d8fc8291aa57c81a0c57425f4cb0f Mon Sep 17 00:00:00 2001 From: Wilhelm Behncke Date: Thu, 17 Aug 2023 16:17:25 +0200 Subject: [PATCH 08/19] TASK: Split Scanner from Lexer --- src/Language/Lexer/Buffer/Buffer.php | 112 ++++ .../Lexer/CharacterStream/CharacterStream.php | 43 +- .../CharacterStreamSnapshot.php | 1 - src/Language/Lexer/Lexer.php | 311 ++++------- src/Language/Lexer/LexerException.php | 25 +- src/Language/Lexer/Rule/Rule.php | 9 +- src/Language/Lexer/Rule/RuleInterface.php | 30 + src/Language/Lexer/Scanner/Scanner.php | 155 ++++++ .../Lexer/Scanner/ScannerException.php | 58 ++ .../Lexer/Scanner/ScannerInterface.php | 37 ++ src/Language/Lexer/Scanner/ScannerState.php | 41 ++ .../Parser/Expression/ExpressionParser.php | 13 +- src/Language/Parser/Module/ModuleParser.php | 1 - .../TypeReference/TypeReferenceParser.php | 7 +- .../Unit/Language/Lexer/Buffer/BufferTest.php | 255 +++++++++ .../Lexer/Matcher/MatcherFixtures.php | 93 ++++ .../Unit/Language/Lexer/Rule/RuleFixtures.php | 44 ++ .../Language/Lexer/Scanner/ScannerTest.php | 525 ++++++++++++++++++ .../Parser/Module/ModuleParserTest.php | 9 +- 19 files changed, 1499 insertions(+), 270 deletions(-) create mode 100644 src/Language/Lexer/Buffer/Buffer.php create mode 100644 src/Language/Lexer/Rule/RuleInterface.php create mode 100644 src/Language/Lexer/Scanner/Scanner.php create mode 100644 src/Language/Lexer/Scanner/ScannerException.php create mode 100644 src/Language/Lexer/Scanner/ScannerInterface.php create mode 100644 src/Language/Lexer/Scanner/ScannerState.php create mode 100644 test/Unit/Language/Lexer/Buffer/BufferTest.php create mode 100644 test/Unit/Language/Lexer/Matcher/MatcherFixtures.php create mode 100644 test/Unit/Language/Lexer/Rule/RuleFixtures.php create mode 100644 test/Unit/Language/Lexer/Scanner/ScannerTest.php diff --git a/src/Language/Lexer/Buffer/Buffer.php b/src/Language/Lexer/Buffer/Buffer.php new file mode 100644 index 0000000..55d4092 --- /dev/null +++ b/src/Language/Lexer/Buffer/Buffer.php @@ -0,0 +1,112 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\Buffer; + +use PackageFactory\ComponentEngine\Parser\Source\Position; +use PackageFactory\ComponentEngine\Parser\Source\Range; + +final class Buffer +{ + private Position $start; + private int $endLineNumber; + private int $nextEndLineNumber; + private int $endColumnNumber; + private int $nextEndColumnNumber; + private string $contents; + + public function __construct() + { + $this->start = Position::zero(); + $this->endLineNumber = 0; + $this->nextEndLineNumber = 0; + $this->endColumnNumber = 0; + $this->nextEndColumnNumber = 0; + $this->contents = ''; + } + + public function getStart(): Position + { + return $this->start; + } + + public function getEnd(): Position + { + return Position::from($this->endLineNumber, $this->endColumnNumber); + } + + public function getRange(): Range + { + return Range::from($this->getStart(), $this->getEnd()); + } + + public function getContents(): string + { + return $this->contents; + } + + public function append(?string $character): void + { + if ($character === null) { + return; + } + + $this->contents .= $character; + + $this->endLineNumber = $this->nextEndLineNumber; + $this->endColumnNumber = $this->nextEndColumnNumber; + + if ($character === "\n") { + $this->nextEndLineNumber++; + $this->nextEndColumnNumber = 0; + } else { + $this->nextEndColumnNumber++; + } + } + + public function flush(): void + { + $this->start = Position::from( + $this->endLineNumber = $this->nextEndLineNumber, + $this->endColumnNumber = $this->nextEndColumnNumber + ); + + $this->contents = ''; + } + + public function overwrite(Buffer $other): void + { + $other->start = $this->start; + $other->endLineNumber = $this->endLineNumber; + $other->nextEndLineNumber = $this->nextEndLineNumber; + $other->endColumnNumber = $this->endColumnNumber; + $other->nextEndColumnNumber = $this->nextEndColumnNumber; + $other->contents = $this->contents; + } + + public function reset(): void + { + $this->endLineNumber = $this->nextEndLineNumber = $this->start->lineNumber; + $this->endColumnNumber = $this->nextEndColumnNumber = $this->start->columnNumber; + $this->contents = ''; + } +} diff --git a/src/Language/Lexer/CharacterStream/CharacterStream.php b/src/Language/Lexer/CharacterStream/CharacterStream.php index cf13037..99af817 100644 --- a/src/Language/Lexer/CharacterStream/CharacterStream.php +++ b/src/Language/Lexer/CharacterStream/CharacterStream.php @@ -22,28 +22,25 @@ namespace PackageFactory\ComponentEngine\Language\Lexer\CharacterStream; -use PackageFactory\ComponentEngine\Parser\Source\Position; - /** * @internal */ final class CharacterStream { private int $byte; - private Cursor $cursor; - private ?string $characterUnderCursor = null; + private ?string $characterUnderCursor = ''; public function __construct(private readonly string $source) { $this->byte = 0; - $this->cursor = new Cursor(); - $this->next(); } public function next(): void { - $this->cursor->advance($this->characterUnderCursor); + if ($this->characterUnderCursor === null) { + return; + } $nextCharacter = $this->source[$this->byte++] ?? null; if ($nextCharacter === null) { @@ -53,13 +50,13 @@ public function next(): void $ord = ord($nextCharacter); if ($ord >= 0x80) { - $nextCharacter .= $this->source[$this->byte++]; + $nextCharacter .= $this->source[$this->byte++] ?? ''; } if ($ord >= 0xe0) { - $nextCharacter .= $this->source[$this->byte++]; + $nextCharacter .= $this->source[$this->byte++] ?? ''; } if ($ord >= 0xf0) { - $nextCharacter .= $this->source[$this->byte++]; + $nextCharacter .= $this->source[$this->byte++] ?? ''; } $this->characterUnderCursor = $nextCharacter; @@ -75,30 +72,10 @@ public function isEnd(): bool return $this->characterUnderCursor === null; } - public function getCurrentPosition(): Position - { - return $this->cursor->getCurrentPosition(); - } - - public function getPreviousPosition(): Position - { - return $this->cursor->getPreviousPosition(); - } - - public function makeSnapshot(): CharacterStreamSnapshot - { - return new CharacterStreamSnapshot( - byte: $this->byte, - cursor: $this->cursor->makeSnapshot(), - characterUnderCursor: $this->characterUnderCursor - ); - } - - public function restoreSnapshot(CharacterStreamSnapshot $snapshot): void + public function overwrite(CharacterStream $other): void { - $this->byte = $snapshot->byte; - $this->cursor->restoreSnapshot($snapshot->cursor); - $this->characterUnderCursor = $snapshot->characterUnderCursor; + $other->byte = $this->byte; + $other->characterUnderCursor = $this->characterUnderCursor; } public function getRest(): string diff --git a/src/Language/Lexer/CharacterStream/CharacterStreamSnapshot.php b/src/Language/Lexer/CharacterStream/CharacterStreamSnapshot.php index e101b5f..4b2c364 100644 --- a/src/Language/Lexer/CharacterStream/CharacterStreamSnapshot.php +++ b/src/Language/Lexer/CharacterStream/CharacterStreamSnapshot.php @@ -29,7 +29,6 @@ final class CharacterStreamSnapshot { public function __construct( public readonly int $byte, - public readonly CursorSnapshot $cursor, public readonly ?string $characterUnderCursor = null ) { } diff --git a/src/Language/Lexer/Lexer.php b/src/Language/Lexer/Lexer.php index 1af7e97..bdc5d7d 100644 --- a/src/Language/Lexer/Lexer.php +++ b/src/Language/Lexer/Lexer.php @@ -22,333 +22,228 @@ namespace PackageFactory\ComponentEngine\Language\Lexer; -use LogicException; -use PackageFactory\ComponentEngine\Language\Lexer\CharacterStream\CharacterStream; -use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; -use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result; -use PackageFactory\ComponentEngine\Language\Lexer\Rule\Token; use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\RuleInterface; use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; +use PackageFactory\ComponentEngine\Language\Lexer\Scanner\Scanner; +use PackageFactory\ComponentEngine\Language\Lexer\Scanner\ScannerException; use PackageFactory\ComponentEngine\Parser\Source\Position; use PackageFactory\ComponentEngine\Parser\Source\Range; final class Lexer { - private readonly Rules $TOKEN_TYPES_SPACE; - private readonly Rules $TOKEN_TYPES_SPACE_AND_COMMENTS; + private static Rules $TOKEN_TYPES_SPACE; + private static Rules $TOKEN_TYPES_SPACE_AND_COMMENTS; - private readonly CharacterStream $characterStream; - private Position $startPosition; - private int $offset = 0; - private string $buffer = ''; - private ?Rule $tokenTypeUnderCursor = null; + private readonly Scanner $scanner; + private ?Rule $ruleUnderCursor = null; public function __construct(string $source) { - $this->TOKEN_TYPES_SPACE = Rules::from( + self::$TOKEN_TYPES_SPACE = Rules::from( Rule::SPACE, Rule::END_OF_LINE ); - $this->TOKEN_TYPES_SPACE_AND_COMMENTS = Rules::from( + self::$TOKEN_TYPES_SPACE_AND_COMMENTS = Rules::from( Rule::SPACE, Rule::END_OF_LINE, Rule::COMMENT ); - $this->characterStream = new CharacterStream($source); - $this->startPosition = Position::zero(); + $this->scanner = new Scanner($source); } public function getRuleUnderCursor(): Rule { - assert($this->tokenTypeUnderCursor !== null); + assert($this->ruleUnderCursor !== null); - return $this->tokenTypeUnderCursor; + return $this->ruleUnderCursor; } public function getBuffer(): string { - return $this->buffer; + return $this->scanner->getBuffer()->getContents(); } public function isEnd(): bool { - return $this->characterStream->isEnd(); + return $this->scanner->isEnd(); } public function assertIsEnd(): void { - if (!$this->isEnd()) { - throw LexerException::becauseOfUnexpectedExceedingSource( - affectedRangeInSource: $this->characterStream->getCurrentPosition()->toRange(), - exceedingCharacter: $this->characterStream->current() ?? '' - ); + try { + $this->scanner->assertIsEnd(); + } catch (ScannerException $e) { + throw LexerException::becauseOfScannerException($e); } } public function getStartPosition(): Position { - return $this->startPosition; + return $this->scanner->getBuffer()->getStart(); } public function getEndPosition(): Position { - return $this->characterStream->getPreviousPosition(); + return $this->scanner->getBuffer()->getEnd(); } public function getCursorRange(): Range { - return $this->getStartPosition()->toRange($this->getEndPosition()); + return $this->scanner->getBuffer()->getRange(); } - public function read(Rule $tokenType): void + public function read(Rule $rule): void { + if ($this->scanner->scan($rule)) { + $this->scanner->commit(); + $this->ruleUnderCursor = $rule; + return; + } - if ($this->characterStream->isEnd()) { + if ($this->scanner->isEnd()) { throw LexerException::becauseOfUnexpectedEndOfSource( - expectedRules: Rules::from($tokenType), - affectedRangeInSource: $this->characterStream->getCurrentPosition()->toRange() + expectedRules: Rules::from($rule), + affectedRangeInSource: $this->scanner->getBuffer()->getRange() ); } - if ($this->extract($tokenType)) { - $this->tokenTypeUnderCursor = $tokenType; - return; - } - throw LexerException::becauseOfUnexpectedCharacterSequence( - expectedRules: Rules::from($tokenType), - affectedRangeInSource: Range::from( - $this->startPosition, - $this->characterStream->getCurrentPosition() - ), - actualCharacterSequence: $this->buffer . $this->characterStream->current() + expectedRules: Rules::from($rule), + affectedRangeInSource: $this->scanner->getBuffer()->getRange(), + actualCharacterSequence: $this->scanner->getBuffer()->getContents() ); } - public function readOneOf(Rules $tokenTypes): void + public function readOneOf(Rules $rules): void { - - if ($this->characterStream->isEnd()) { - throw LexerException::becauseOfUnexpectedEndOfSource( - expectedRules: $tokenTypes, - affectedRangeInSource: $this->characterStream->getCurrentPosition()->toRange() - ); + if ($rule = $this->scanner->scanOneOf(...$rules->items)) { + $this->scanner->commit(); + assert($rule instanceof Rule); + $this->ruleUnderCursor = $rule; + return; } - $foundRule = $this->extractOneOf($tokenTypes); - if ($foundRule === null) { - throw LexerException::becauseOfUnexpectedCharacterSequence( - expectedRules: $tokenTypes, - affectedRangeInSource: Range::from( - $this->startPosition, - $this->characterStream->getPreviousPosition() - ), - actualCharacterSequence: $this->buffer + if ($this->scanner->isEnd()) { + throw LexerException::becauseOfUnexpectedEndOfSource( + expectedRules: $rules, + affectedRangeInSource: $this->scanner->getBuffer()->getRange() ); } - $this->tokenTypeUnderCursor = $foundRule; + throw LexerException::becauseOfUnexpectedCharacterSequence( + expectedRules: $rules, + affectedRangeInSource: $this->scanner->getBuffer()->getRange(), + actualCharacterSequence: $this->scanner->getBuffer()->getContents() + ); } - public function probe(Rule $tokenType): bool + public function probe(Rule $rule): bool { - - if ($this->characterStream->isEnd()) { - return false; - } - - $snapshot = $this->characterStream->makeSnapshot(); - - if ($tokenType = $this->extract($tokenType)) { - $this->tokenTypeUnderCursor = $tokenType; + if ($this->scanner->scan($rule)) { + $this->scanner->commit(); + $this->ruleUnderCursor = $rule; return true; } - $this->characterStream->restoreSnapshot($snapshot); + $this->scanner->dismiss(); return false; } - public function probeOneOf(Rules $tokenTypes): bool + public function probeOneOf(Rules $rules): ?RuleInterface { - if ($this->characterStream->isEnd()) { - return false; - } - - $snapshot = $this->characterStream->makeSnapshot(); - - if ($tokenType = $this->extractOneOf($tokenTypes)) { - $this->tokenTypeUnderCursor = $tokenType; - return true; + if ($rule = $this->scanner->scanOneOf(...$rules->items)) { + $this->scanner->commit(); + assert($rule instanceof Rule); + $this->ruleUnderCursor = $rule; + return $rule; } - $this->characterStream->restoreSnapshot($snapshot); - return false; + $this->scanner->dismiss(); + return null; } - public function peek(Rule $tokenType): bool + public function peek(Rule $rule): bool { - if ($this->characterStream->isEnd()) { - return false; - } - - $snapshot = $this->characterStream->makeSnapshot(); - $result = $this->extract($tokenType) !== null; - $this->characterStream->restoreSnapshot($snapshot); + $result = $this->scanner->scan($rule); + $this->scanner->dismiss(); return $result; } - public function peekOneOf(Rules $tokenTypes): ?Rule + public function peekOneOf(Rules $rules): ?RuleInterface { - if ($this->characterStream->isEnd()) { - return null; - } - - $snapshot = $this->characterStream->makeSnapshot(); - $foundRule = $this->extractOneOf($tokenTypes); - $this->characterStream->restoreSnapshot($snapshot); + $rule = $this->scanner->scanOneOf(...$rules->items); + $this->scanner->dismiss(); - return $foundRule; + return $rule; } - public function expect(Rule $tokenType): void + public function expect(Rule $rule): void { - if ($this->characterStream->isEnd()) { + if ($this->scanner->isEnd()) { throw LexerException::becauseOfUnexpectedEndOfSource( - expectedRules: Rules::from($tokenType), - affectedRangeInSource: $this->characterStream->getCurrentPosition()->toRange() + expectedRules: Rules::from($rule), + affectedRangeInSource: $this->scanner->getBuffer()->getRange() ); } - $snapshot = $this->characterStream->makeSnapshot(); - if ($this->extract($tokenType) === null) { + if (!$this->scanner->scan($rule)) { throw LexerException::becauseOfUnexpectedCharacterSequence( - expectedRules: Rules::from($tokenType), - affectedRangeInSource: Range::from( - $this->startPosition, - $this->characterStream->getPreviousPosition() - ), - actualCharacterSequence: $this->buffer + expectedRules: Rules::from($rule), + affectedRangeInSource: $this->scanner->getBuffer()->getRange(), + actualCharacterSequence: $this->scanner->getBuffer()->getContents() ); } - $this->characterStream->restoreSnapshot($snapshot); + $this->scanner->dismiss(); } - public function expectOneOf(Rules $tokenTypes): Rule + public function expectOneOf(Rules $rules): RuleInterface { - if ($this->characterStream->isEnd()) { + if ($this->scanner->isEnd()) { throw LexerException::becauseOfUnexpectedEndOfSource( - expectedRules: $tokenTypes, - affectedRangeInSource: $this->characterStream->getCurrentPosition()->toRange() + expectedRules: $rules, + affectedRangeInSource: $this->scanner->getBuffer()->getRange() ); } - $snapshot = $this->characterStream->makeSnapshot(); - $foundRule = $this->extractOneOf($tokenTypes); - if ($foundRule === null) { - throw LexerException::becauseOfUnexpectedCharacterSequence( - expectedRules: $tokenTypes, - affectedRangeInSource: Range::from( - $this->startPosition, - $this->characterStream->getPreviousPosition() - ), - actualCharacterSequence: $this->buffer - ); + if ($rule = $this->scanner->scanOneOf(...$rules->items)) { + $this->scanner->dismiss(); + return $rule; } - $this->characterStream->restoreSnapshot($snapshot); - - return $foundRule; + throw LexerException::becauseOfUnexpectedCharacterSequence( + expectedRules: $rules, + affectedRangeInSource: $this->scanner->getBuffer()->getRange(), + actualCharacterSequence: $this->scanner->getBuffer()->getContents() + ); } public function skipSpace(): void { - $this->skipAnyOf($this->TOKEN_TYPES_SPACE); - } - - public function skipSpaceAndComments(): void - { - $this->skipAnyOf($this->TOKEN_TYPES_SPACE_AND_COMMENTS); - } - - private function skipAnyOf(Rules $tokenTypes): void - { - while (true) { - $character = $this->characterStream->current(); - - foreach ($tokenTypes->items as $tokenType) { - $matcher = Matcher::for($tokenType); - - if ($matcher->match($character, 0) === Result::KEEP) { - $this->read($tokenType); - continue 2; - } - } - - break; + while ($this->scanner->scanOneOf(...self::$TOKEN_TYPES_SPACE->items)) { + $this->scanner->commit(); } - } - - private function extract(Rule $tokenType): ?Rule - { - $this->startPosition = $this->characterStream->getCurrentPosition(); - $this->offset = 0; - $this->buffer = ''; - - while (true) { - $character = $this->characterStream->current(); - $result = Matcher::for($tokenType)->match($character, $this->offset); - - if ($result === Result::SATISFIED) { - return $tokenType; - } - - if ($result === Result::CANCEL) { - return null; - } - $this->offset++; - $this->buffer .= $character; - $this->characterStream->next(); + if ($this->scanner->isEnd()) { + $this->scanner->commit(); + } else { + $this->scanner->dismiss(); } } - private function extractOneOf(Rules $tokenTypes): ?Rule + public function skipSpaceAndComments(): void { - $this->startPosition = $this->characterStream->getCurrentPosition(); - $this->offset = 0; - $this->buffer = ''; - - $tokenTypeCandidates = $tokenTypes->items; - while (count($tokenTypeCandidates)) { - $character = $this->characterStream->current(); - - $nextRuleCandidates = []; - foreach ($tokenTypeCandidates as $tokenType) { - $result = Matcher::for($tokenType)->match($character, $this->offset); - - if ($result === Result::SATISFIED) { - return $tokenType; - } - - if ($result === Result::KEEP) { - $nextRuleCandidates[] = $tokenType; - } - } - - $this->offset++; - $this->buffer .= $character; - $tokenTypeCandidates = $nextRuleCandidates; - $this->characterStream->next(); + while ($this->scanner->scanOneOf(...self::$TOKEN_TYPES_SPACE_AND_COMMENTS->items)) { + $this->scanner->commit(); } - return null; - } - - public function dumpRest(): string - { - return $this->characterStream->getRest(); + if ($this->scanner->isEnd()) { + $this->scanner->commit(); + } else { + $this->scanner->dismiss(); + } } } diff --git a/src/Language/Lexer/LexerException.php b/src/Language/Lexer/LexerException.php index 61f5a68..aa2785a 100644 --- a/src/Language/Lexer/LexerException.php +++ b/src/Language/Lexer/LexerException.php @@ -22,16 +22,19 @@ namespace PackageFactory\ComponentEngine\Language\Lexer; +use Exception; use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; +use PackageFactory\ComponentEngine\Language\Lexer\Scanner\ScannerException; use PackageFactory\ComponentEngine\Language\Util\DebugHelper; use PackageFactory\ComponentEngine\Parser\Source\Range; -final class LexerException extends \Exception +final class LexerException extends Exception { private function __construct( int $code, string $message, - public readonly Range $affectedRangeInSource + public readonly Range $affectedRangeInSource, + ?Exception $cause = null ) { $message = sprintf( '[%s:%s] %s', @@ -40,7 +43,7 @@ private function __construct( $message ); - parent::__construct($message, $code); + parent::__construct($message, $code, $cause); } public static function becauseOfUnexpectedEndOfSource( @@ -73,17 +76,13 @@ public static function becauseOfUnexpectedCharacterSequence( ); } - public static function becauseOfUnexpectedExceedingSource( - Range $affectedRangeInSource, - string $exceedingCharacter - ): self { + public static function becauseOfScannerException(ScannerException $cause): self + { return new self( - code: 1691675396, - message: sprintf( - 'Expected source to end, but found exceeding character "%s".', - $exceedingCharacter - ), - affectedRangeInSource: $affectedRangeInSource + code: 1692274173, + message: $cause->getMessage(), + affectedRangeInSource: $cause->affectedRangeInSource, + cause: $cause ); } } diff --git a/src/Language/Lexer/Rule/Rule.php b/src/Language/Lexer/Rule/Rule.php index 1f67a0b..87ee387 100644 --- a/src/Language/Lexer/Rule/Rule.php +++ b/src/Language/Lexer/Rule/Rule.php @@ -22,7 +22,9 @@ namespace PackageFactory\ComponentEngine\Language\Lexer\Rule; -enum Rule: string +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; + +enum Rule: string implements RuleInterface { case COMMENT = 'COMMENT'; @@ -91,4 +93,9 @@ enum Rule: string case SPACE = 'SPACE'; case END_OF_LINE = 'END_OF_LINE'; + + public function getMatcher(): Matcher + { + return Matcher::for($this); + } } diff --git a/src/Language/Lexer/Rule/RuleInterface.php b/src/Language/Lexer/Rule/RuleInterface.php new file mode 100644 index 0000000..3da73c7 --- /dev/null +++ b/src/Language/Lexer/Rule/RuleInterface.php @@ -0,0 +1,30 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\Rule; + +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; + +interface RuleInterface +{ + public function getMatcher(): Matcher; +} diff --git a/src/Language/Lexer/Scanner/Scanner.php b/src/Language/Lexer/Scanner/Scanner.php new file mode 100644 index 0000000..b240461 --- /dev/null +++ b/src/Language/Lexer/Scanner/Scanner.php @@ -0,0 +1,155 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\Scanner; + +use PackageFactory\ComponentEngine\Language\Lexer\Buffer\Buffer; +use PackageFactory\ComponentEngine\Language\Lexer\CharacterStream\CharacterStream; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\RuleInterface; + +final class Scanner implements ScannerInterface +{ + private readonly ScannerState $main; + private readonly ScannerState $branch; + + private bool $isHalted; + private int $offset; + + public function __construct(string $source) + { + $this->main = new ScannerState( + characterStream: new CharacterStream($source), + buffer: new Buffer() + ); + $this->branch = new ScannerState( + characterStream: new CharacterStream($source), + buffer: new Buffer() + ); + + $this->isHalted = false; + $this->offset = 0; + } + + public function getBuffer(): Buffer + { + return $this->branch->buffer; + } + + public function isEnd(): bool + { + return $this->branch->characterStream->isEnd(); + } + + public function assertIsEnd(): void + { + if (!$this->branch->characterStream->isEnd()) { + $this->branch->buffer->flush(); + $this->branch->buffer->append($this->branch->characterStream->current()); + $this->isHalted = true; + + throw ScannerException::becauseOfUnexpectedExceedingSource( + affectedRangeInSource: $this->branch->buffer->getRange(), + exceedingCharacter: $this->branch->buffer->getContents() + ); + } + } + + public function scan(RuleInterface $rule): bool + { + assert(!$this->isHalted); + + $this->branch->buffer->flush(); + $this->offset = 0; + + $matcher = $rule->getMatcher(); + while (true) { + $character = $this->branch->characterStream->current(); + $result = $matcher->match($character, $this->offset); + + if ($result === Result::SATISFIED) { + $this->isHalted = true; + return true; + } + + if ($result === Result::CANCEL) { + $this->branch->buffer->append($character); + $this->isHalted = true; + return false; + } + + $this->offset++; + $this->branch->buffer->append($character); + $this->branch->characterStream->next(); + } + } + + public function scanOneOf(RuleInterface ...$rules): ?RuleInterface + { + assert(!$this->isHalted); + + $this->branch->buffer->flush(); + $this->offset = 0; + + $candidates = $rules; + while ($candidates) { + $character = $this->branch->characterStream->current(); + + $nextCandidates = []; + foreach ($candidates as $candidate) { + $matcher = $candidate->getMatcher(); + $result = $matcher->match($character, $this->offset); + + if ($result === Result::SATISFIED) { + $this->isHalted = true; + return $candidate; + } + + if ($result === Result::KEEP) { + $nextCandidates[] = $candidate; + } + } + + if ($candidates = $nextCandidates) { + $this->offset++; + $this->branch->characterStream->next(); + } + + $this->branch->buffer->append($character); + } + + $this->isHalted = true; + return null; + } + + public function commit(): void + { + $this->branch->overwrite($this->main); + $this->isHalted = false; + } + + public function dismiss(): void + { + $this->main->overwrite($this->branch); + $this->isHalted = false; + } +} diff --git a/src/Language/Lexer/Scanner/ScannerException.php b/src/Language/Lexer/Scanner/ScannerException.php new file mode 100644 index 0000000..013e567 --- /dev/null +++ b/src/Language/Lexer/Scanner/ScannerException.php @@ -0,0 +1,58 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\Scanner; + +use PackageFactory\ComponentEngine\Language\Lexer\Buffer\Buffer; +use PackageFactory\ComponentEngine\Parser\Source\Range; + +final class ScannerException extends \Exception +{ + private function __construct( + int $code, + string $message, + public readonly Range $affectedRangeInSource + ) { + $message = sprintf( + '[%s:%s] %s', + $affectedRangeInSource->start->lineNumber, + $affectedRangeInSource->start->columnNumber, + $message + ); + + parent::__construct($message, $code); + } + + public static function becauseOfUnexpectedExceedingSource( + Range $affectedRangeInSource, + string $exceedingCharacter + ): self { + return new self( + code: 1691675396, + message: sprintf( + 'Expected source to end, but found exceeding character "%s".', + $exceedingCharacter + ), + affectedRangeInSource: $affectedRangeInSource + ); + } +} diff --git a/src/Language/Lexer/Scanner/ScannerInterface.php b/src/Language/Lexer/Scanner/ScannerInterface.php new file mode 100644 index 0000000..bb4dc49 --- /dev/null +++ b/src/Language/Lexer/Scanner/ScannerInterface.php @@ -0,0 +1,37 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\Scanner; + +use PackageFactory\ComponentEngine\Language\Lexer\Buffer\Buffer; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\RuleInterface; + +interface ScannerInterface +{ + public function getBuffer(): Buffer; + public function isEnd(): bool; + + public function scan(RuleInterface $rule): bool; + public function scanOneOf(RuleInterface ...$rules): ?RuleInterface; + public function commit(): void; + public function dismiss(): void; +} diff --git a/src/Language/Lexer/Scanner/ScannerState.php b/src/Language/Lexer/Scanner/ScannerState.php new file mode 100644 index 0000000..37c78dc --- /dev/null +++ b/src/Language/Lexer/Scanner/ScannerState.php @@ -0,0 +1,41 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\Scanner; + +use PackageFactory\ComponentEngine\Language\Lexer\Buffer\Buffer; +use PackageFactory\ComponentEngine\Language\Lexer\CharacterStream\CharacterStream; + +final class ScannerState +{ + public function __construct( + public readonly CharacterStream $characterStream, + public readonly Buffer $buffer + ) { + } + + public function overwrite(ScannerState $other): void + { + $this->characterStream->overwrite($other->characterStream); + $this->buffer->overwrite($other->buffer); + } +} diff --git a/src/Language/Parser/Expression/ExpressionParser.php b/src/Language/Parser/Expression/ExpressionParser.php index da4d53e..3d239fe 100644 --- a/src/Language/Parser/Expression/ExpressionParser.php +++ b/src/Language/Parser/Expression/ExpressionParser.php @@ -129,6 +129,7 @@ public function parse(Lexer $lexer): ExpressionNode } if ($tokenType = $lexer->peekOneOf(self::$TOKEN_TYPES_BINARY_OPERATORS)) { + assert($tokenType instanceof Rule); if ($this->precedence->mustStopAt($tokenType)) { return $result; } @@ -182,9 +183,10 @@ private function parseUnaryStatement(Lexer $lexer): ExpressionNode private function parseUnaryOperation(Lexer $lexer): ExpressionNode { + $operator = $this->parseUnaryOperator($lexer); $start = $lexer->getStartPosition(); + $lexer->skipSpaceAndComments(); - $operator = $this->parseUnaryOperator($lexer); $operand = $this->parseUnaryStatement($lexer); $unaryOperationNode = new UnaryOperationNode( @@ -205,12 +207,7 @@ private function parseUnaryOperation(Lexer $lexer): ExpressionNode private function parseUnaryOperator(Lexer $lexer): UnaryOperator { $lexer->read(Rule::SYMBOL_EXCLAMATIONMARK); - - $unaryOperator = UnaryOperator::NOT; - - $lexer->skipSpaceAndComments(); - - return $unaryOperator; + return UnaryOperator::NOT; } private function withPrecedence(Precedence $precedence): self @@ -372,7 +369,7 @@ private function parseAccessType(Lexer $lexer): AccessType return match ($lexer->getRuleUnderCursor()) { Rule::SYMBOL_PERIOD => AccessType::MANDATORY, Rule::SYMBOL_OPTCHAIN => AccessType::OPTIONAL, - default => throw new LogicException() + default => throw new LogicException($lexer->getRuleUnderCursor()->name) }; } diff --git a/src/Language/Parser/Module/ModuleParser.php b/src/Language/Parser/Module/ModuleParser.php index 7ed9205..a08601c 100644 --- a/src/Language/Parser/Module/ModuleParser.php +++ b/src/Language/Parser/Module/ModuleParser.php @@ -91,7 +91,6 @@ private function parseExport(Lexer $lexer): ExportNode $this->exportParser ??= ExportParser::singleton(); $export = $this->exportParser->parse($lexer); - $lexer->skipSpaceAndComments(); return $export; } diff --git a/src/Language/Parser/TypeReference/TypeReferenceParser.php b/src/Language/Parser/TypeReference/TypeReferenceParser.php index 38d91eb..99001a6 100644 --- a/src/Language/Parser/TypeReference/TypeReferenceParser.php +++ b/src/Language/Parser/TypeReference/TypeReferenceParser.php @@ -43,12 +43,15 @@ final class TypeReferenceParser public function parse(Lexer $lexer): TypeReferenceNode { $this->start = null; - $isOptional = $lexer->probe(Rule::SYMBOL_QUESTIONMARK); - $this->start = $lexer->getStartPosition(); + if ($isOptional = $lexer->probe(Rule::SYMBOL_QUESTIONMARK)) { + $this->start = $lexer->getStartPosition(); + } $typeNameNodes = $this->parseTypeNames($lexer); $isArray = $this->parseIsArray($lexer); $end = $lexer->getEndPosition(); + assert($this->start !== null); + try { return new TypeReferenceNode( rangeInSource: Range::from($this->start, $end), diff --git a/test/Unit/Language/Lexer/Buffer/BufferTest.php b/test/Unit/Language/Lexer/Buffer/BufferTest.php new file mode 100644 index 0000000..2a2c818 --- /dev/null +++ b/test/Unit/Language/Lexer/Buffer/BufferTest.php @@ -0,0 +1,255 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Test\Unit\Language\Lexer\Buffer; + +use PackageFactory\ComponentEngine\Language\Lexer\Buffer\Buffer; +use PackageFactory\ComponentEngine\Parser\Source\Position; +use PackageFactory\ComponentEngine\Parser\Source\Range; +use PHPUnit\Framework\TestCase; + +final class BufferTest extends TestCase +{ + /** + * @test + */ + public function testInitialBufferState(): void + { + $this->assertBufferState( + expectedStart: Position::zero(), + expectedEnd: Position::zero(), + expectedContents: '', + actualBuffer: new Buffer() + ); + } + + /** + * @test + */ + public function appendCapturesTheGivenCharacterAndIncrementsTheColumnNumberOfTheEndPosition(): void + { + $buffer = new Buffer(); + $buffer->append('A'); + + $this->assertBufferState( + expectedStart: Position::zero(), + expectedEnd: Position::zero(), + expectedContents: 'A', + actualBuffer: $buffer + ); + + $buffer->append('B'); + + $this->assertBufferState( + expectedStart: Position::zero(), + expectedEnd: Position::from(0, 1), + expectedContents: 'AB', + actualBuffer: $buffer + ); + + $buffer->append('C'); + + $this->assertBufferState( + expectedStart: Position::zero(), + expectedEnd: Position::from(0, 2), + expectedContents: 'ABC', + actualBuffer: $buffer + ); + } + + /** + * @test + */ + public function appendAcceptsMultiByteCharactersAndCountsThemAsOneCharacterEach(): void + { + $buffer = new Buffer(); + $buffer->append('🌵'); + + $this->assertBufferState( + expectedStart: Position::zero(), + expectedEnd: Position::zero(), + expectedContents: '🌵', + actualBuffer: $buffer + ); + + $buffer->append('🆚'); + + $this->assertBufferState( + expectedStart: Position::zero(), + expectedEnd: Position::from(0, 1), + expectedContents: '🌵🆚', + actualBuffer: $buffer + ); + + $buffer->append('⌚️'); + + $this->assertBufferState( + expectedStart: Position::zero(), + expectedEnd: Position::from(0, 2), + expectedContents: '🌵🆚⌚️', + actualBuffer: $buffer + ); + } + + /** + * @test + */ + public function appendCapturesNewLineCharacterIncrementingTheLineNumberOfTheEndPosition(): void + { + $buffer = new Buffer(); + $buffer->append('A'); + + $this->assertBufferState( + expectedStart: Position::zero(), + expectedEnd: Position::zero(), + expectedContents: 'A', + actualBuffer: $buffer + ); + + $buffer->append("\n"); + + $this->assertBufferState( + expectedStart: Position::zero(), + expectedEnd: Position::from(0, 1), + expectedContents: "A\n", + actualBuffer: $buffer + ); + + $buffer->append('B'); + + $this->assertBufferState( + expectedStart: Position::zero(), + expectedEnd: Position::from(1, 0), + expectedContents: "A\nB", + actualBuffer: $buffer + ); + } + + /** + * @test + */ + public function flushEmptiesTheContentsAndSetsNewBoundingPositions(): void + { + $buffer = new Buffer(); + $buffer->append('A'); + $buffer->append('B'); + $buffer->append('C'); + + $buffer->flush(); + + $this->assertBufferState( + expectedStart: Position::from(0, 3), + expectedEnd: Position::from(0, 3), + expectedContents: '', + actualBuffer: $buffer + ); + + $buffer = new Buffer(); + $buffer->append('A'); + $buffer->append("\n"); + $buffer->append('C'); + + $buffer->flush(); + + $this->assertBufferState( + expectedStart: Position::from(1, 1), + expectedEnd: Position::from(1, 1), + expectedContents: '', + actualBuffer: $buffer + ); + } + + /** + * @test + */ + public function resetEmptiesTheContentsAndRestoresBoundingPositions(): void + { + $buffer = new Buffer(); + $buffer->append('A'); + $buffer->append('B'); + $buffer->append('C'); + + $buffer->reset(); + + $this->assertBufferState( + expectedStart: Position::from(0, 0), + expectedEnd: Position::from(0, 0), + expectedContents: '', + actualBuffer: $buffer + ); + + $buffer = new Buffer(); + $buffer->append('A'); + $buffer->append('B'); + + $buffer->flush(); + + $buffer->append('C'); + $buffer->append('D'); + + $buffer->reset(); + + $buffer->append('E'); + $buffer->append('F'); + + $this->assertBufferState( + expectedStart: Position::from(0, 2), + expectedEnd: Position::from(0, 3), + expectedContents: 'EF', + actualBuffer: $buffer + ); + } + + public static function assertBufferState( + Position $expectedStart, + Position $expectedEnd, + string $expectedContents, + Buffer $actualBuffer, + string $message = '' + ): void { + $prefix = $message ? $message . ': ' : ''; + + self::assertEquals( + $expectedStart, + $actualBuffer->getStart(), + $prefix . 'Start position of buffer is incorrect.' + ); + + self::assertEquals( + $expectedEnd, + $actualBuffer->getEnd(), + $prefix . 'End position of buffer is incorrect.' + ); + + self::assertEquals( + Range::from($expectedStart, $expectedEnd), + $actualBuffer->getRange(), + $prefix . 'Range of buffer is incorrect.' + ); + + self::assertEquals( + $expectedContents, + $actualBuffer->getContents(), + $prefix . 'Contents of buffer are incorrect.' + ); + } +} diff --git a/test/Unit/Language/Lexer/Matcher/MatcherFixtures.php b/test/Unit/Language/Lexer/Matcher/MatcherFixtures.php new file mode 100644 index 0000000..a0090f2 --- /dev/null +++ b/test/Unit/Language/Lexer/Matcher/MatcherFixtures.php @@ -0,0 +1,93 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Test\Unit\Language\Lexer\Matcher; + +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result; + +final class MatcherFixtures +{ + public static function everything(): Matcher + { + return new class extends Matcher + { + public function match(?string $character, int $offset): Result + { + if ($character === null) { + return Result::SATISFIED; + } + + return Result::KEEP; + } + }; + } + + public static function nothing(): Matcher + { + return new class extends Matcher + { + public function match(?string $character, int $offset): Result + { + return Result::CANCEL; + } + }; + } + + public static function satisfiedAtOffset(int $matchingOffset): Matcher + { + return new class($matchingOffset) extends Matcher + { + public function __construct(private readonly int $matchingOffset) + { + } + + public function match(?string $character, int $offset): Result + { + if ($offset === $this->matchingOffset) { + return Result::SATISFIED; + } + + return Result::KEEP; + } + }; + } + + public static function cancelAtOffset(int $matchingOffset): Matcher + { + return new class($matchingOffset) extends Matcher + { + public function __construct(private readonly int $matchingOffset) + { + } + + public function match(?string $character, int $offset): Result + { + if ($offset === $this->matchingOffset) { + return Result::CANCEL; + } + + return Result::KEEP; + } + }; + } +} diff --git a/test/Unit/Language/Lexer/Rule/RuleFixtures.php b/test/Unit/Language/Lexer/Rule/RuleFixtures.php new file mode 100644 index 0000000..c6168e7 --- /dev/null +++ b/test/Unit/Language/Lexer/Rule/RuleFixtures.php @@ -0,0 +1,44 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Test\Unit\Language\Lexer\Rule; + +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\RuleInterface; + +final class RuleFixtures +{ + public static function withMatcher(Matcher $matcher): RuleInterface + { + return new class($matcher) implements RuleInterface + { + public function __construct(private readonly Matcher $matcher) + { + } + + public function getMatcher(): Matcher + { + return $this->matcher; + } + }; + } +} diff --git a/test/Unit/Language/Lexer/Scanner/ScannerTest.php b/test/Unit/Language/Lexer/Scanner/ScannerTest.php new file mode 100644 index 0000000..59d722a --- /dev/null +++ b/test/Unit/Language/Lexer/Scanner/ScannerTest.php @@ -0,0 +1,525 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Test\Unit\Language\Lexer\Scanner; + +use AssertionError; +use PackageFactory\ComponentEngine\Language\Lexer\Scanner\Scanner; +use PackageFactory\ComponentEngine\Language\Lexer\Scanner\ScannerInterface; +use PackageFactory\ComponentEngine\Parser\Source\Position; +use PackageFactory\ComponentEngine\Test\Unit\Language\Lexer\Buffer\BufferTest; +use PackageFactory\ComponentEngine\Test\Unit\Language\Lexer\Matcher\MatcherFixtures; +use PackageFactory\ComponentEngine\Test\Unit\Language\Lexer\Rule\RuleFixtures; +use PHPUnit\Framework\TestCase; + +final class ScannerTest extends TestCase +{ + /** + * @test + */ + public function testInitialScannerStateWhenSourceIsEmpty(): void + { + $this->assertScannerState( + expectedBufferStart: Position::from(0, 0), + expectedBufferEnd: Position::from(0, 0), + expectedBufferContents: '', + expectedIsEnd: true, + actualScanner: new Scanner(''), + ); + } + + /** + * @test + */ + public function testInitialScannerStateWhenSourceIsNotEmpty(): void + { + $this->assertScannerState( + expectedBufferStart: Position::from(0, 0), + expectedBufferEnd: Position::from(0, 0), + expectedBufferContents: '', + expectedIsEnd: false, + actualScanner: new Scanner('A'), + ); + } + + /** + * @test + */ + public function scanReturnsTrueAndCapturesMatchingCharactersIfGivenRuleMatches(): void + { + $scanner = new Scanner('ABC'); + $rule = RuleFixtures::withMatcher( + MatcherFixtures::everything() + ); + + $this->assertTrue($scanner->scan($rule)); + $this->assertScannerState( + expectedBufferStart: Position::from(0, 0), + expectedBufferEnd: Position::from(0, 2), + expectedBufferContents: 'ABC', + expectedIsEnd: true, + actualScanner: $scanner, + ); + } + + /** + * @test + */ + public function scanCapturesEveryCharacterUntilMatchWasFound(): void + { + $scanner = new Scanner('ABC'); + $rule = RuleFixtures::withMatcher( + MatcherFixtures::satisfiedAtOffset(1) + ); + + $this->assertTrue($scanner->scan($rule)); + $this->assertScannerState( + expectedBufferStart: Position::from(0, 0), + expectedBufferEnd: Position::from(0, 0), + expectedBufferContents: 'A', + expectedIsEnd: false, + actualScanner: $scanner, + ); + + $scanner->commit(); + + $this->assertTrue($scanner->scan($rule)); + $this->assertScannerState( + expectedBufferStart: Position::from(0, 1), + expectedBufferEnd: Position::from(0, 1), + expectedBufferContents: 'B', + expectedIsEnd: false, + actualScanner: $scanner, + ); + + $scanner->commit(); + + $this->assertTrue($scanner->scan($rule)); + $this->assertScannerState( + expectedBufferStart: Position::from(0, 2), + expectedBufferEnd: Position::from(0, 2), + expectedBufferContents: 'C', + expectedIsEnd: true, + actualScanner: $scanner, + ); + } + + /** + * @test + */ + public function scanReturnsFalseButCapturesAllMatchingCharactersUntilFailureIfGivenRuleDoesNotMatch(): void + { + $scanner = new Scanner('AABBCC'); + $rule = RuleFixtures::withMatcher( + MatcherFixtures::cancelAtOffset(3) + ); + + $this->assertFalse($scanner->scan($rule)); + $this->assertScannerState( + expectedBufferStart: Position::from(0, 0), + expectedBufferEnd: Position::from(0, 3), + expectedBufferContents: 'AABB', + expectedIsEnd: false, + actualScanner: $scanner, + ); + } + + /** + * @test + */ + public function scanCannotContinueOnceHalted(): void + { + $scanner = new Scanner('ABC'); + $notMatchingRule = RuleFixtures::withMatcher( + MatcherFixtures::nothing() + ); + + $scanner->scan($notMatchingRule); + + $this->expectException(AssertionError::class); + $scanner->scan($notMatchingRule); + } + + /** + * @test + */ + public function scanReturnsTrueAndCapturesMatchingCharactersIfGivenRuleDoesNotMatchButTheNextRuleDoes(): void + { + $scanner = new Scanner('ABC'); + $notMatchingRule = RuleFixtures::withMatcher( + MatcherFixtures::nothing() + ); + $matchingRule = RuleFixtures::withMatcher( + MatcherFixtures::everything() + ); + + $scanner->scan($notMatchingRule); + $scanner->dismiss(); + $scanner->scan($matchingRule); + + $this->assertScannerState( + expectedBufferStart: Position::from(0, 0), + expectedBufferEnd: Position::from(0, 2), + expectedBufferContents: 'ABC', + expectedIsEnd: true, + actualScanner: $scanner, + ); + } + + /** + * @test + */ + public function scanOneOfCapturesMatchingCharactersAndReturnsTheMatchingRuleIfAnyOfTheGivenRulesMatch(): void + { + $scanner = new Scanner('ABC'); + $notMatchingRule1 = RuleFixtures::withMatcher( + MatcherFixtures::nothing() + ); + $notMatchingRule2 = RuleFixtures::withMatcher( + MatcherFixtures::nothing() + ); + $matchingRule = RuleFixtures::withMatcher( + MatcherFixtures::everything() + ); + + $this->assertSame( + $matchingRule, + $scanner->scanOneOf($notMatchingRule1, $matchingRule, $notMatchingRule2) + ); + $this->assertScannerState( + expectedBufferStart: Position::from(0, 0), + expectedBufferEnd: Position::from(0, 2), + expectedBufferContents: 'ABC', + expectedIsEnd: true, + actualScanner: $scanner, + ); + } + + /** + * @test + */ + public function scanOneOfReturnsNullButCapturesAllMatchingCharactersUntilFailureIfNoneOfTheGivenRulesMatch(): void + { + // + // Non-Match first + // + + $scanner = new Scanner('AABBCC'); + $notMatchingRule1 = RuleFixtures::withMatcher( + MatcherFixtures::cancelAtOffset(2) + ); + $notMatchingRule2 = RuleFixtures::withMatcher( + MatcherFixtures::cancelAtOffset(3) + ); + $notMatchingRule3 = RuleFixtures::withMatcher( + MatcherFixtures::cancelAtOffset(4) + ); + + $this->assertNull( + $scanner->scanOneOf($notMatchingRule1, $notMatchingRule2, $notMatchingRule3) + ); + $this->assertScannerState( + expectedBufferStart: Position::from(0, 0), + expectedBufferEnd: Position::from(0, 4), + expectedBufferContents: 'AABBC', + expectedIsEnd: false, + actualScanner: $scanner, + ); + + // + // Match first + // + + $scanner = new Scanner('AAABBBCCC'); + $notMatchingRule1 = RuleFixtures::withMatcher( + MatcherFixtures::cancelAtOffset(2) + ); + $notMatchingRule2 = RuleFixtures::withMatcher( + MatcherFixtures::cancelAtOffset(2) + ); + $matchingRule = RuleFixtures::withMatcher( + MatcherFixtures::satisfiedAtOffset(3) + ); + + $scanner->scanOneOf($notMatchingRule1, $notMatchingRule2, $matchingRule); + $scanner->commit(); + $scanner->scanOneOf($notMatchingRule1, $notMatchingRule2); + + $this->assertScannerState( + expectedBufferStart: Position::from(0, 3), + expectedBufferEnd: Position::from(0, 5), + expectedBufferContents: 'BBB', + expectedIsEnd: false, + actualScanner: $scanner, + ); + } + + /** + * @test + */ + public function scanOneOfIfTwoCompetingRulesBothMatchAtTheSameOffsetTheFirstOneThatMatchesWins(): void + { + $scanner = new Scanner('ABC'); + $matchingRule1 = RuleFixtures::withMatcher( + MatcherFixtures::satisfiedAtOffset(2) + ); + $matchingRule2 = RuleFixtures::withMatcher( + MatcherFixtures::satisfiedAtOffset(2) + ); + $notMatchingRule = RuleFixtures::withMatcher( + MatcherFixtures::nothing() + ); + + $this->assertSame( + $matchingRule1, + $scanner->scanOneOf($matchingRule1, $matchingRule2, $notMatchingRule) + ); + $this->assertScannerState( + expectedBufferStart: Position::from(0, 0), + expectedBufferEnd: Position::from(0, 1), + expectedBufferContents: 'AB', + expectedIsEnd: false, + actualScanner: $scanner, + ); + } + + /** + * @test + */ + public function scanOneOfIfTwoCompetingRulesBothMatchAtDifferentOffsetsTheFirstOneThatMatchesWins(): void + { + $scanner = new Scanner('ABC'); + $matchingRule1 = RuleFixtures::withMatcher( + MatcherFixtures::satisfiedAtOffset(3) + ); + $matchingRule2 = RuleFixtures::withMatcher( + MatcherFixtures::satisfiedAtOffset(2) + ); + $notMatchingRule = RuleFixtures::withMatcher( + MatcherFixtures::nothing() + ); + + $this->assertSame( + $matchingRule2, + $scanner->scanOneOf($matchingRule1, $matchingRule2, $notMatchingRule) + ); + $this->assertScannerState( + expectedBufferStart: Position::from(0, 0), + expectedBufferEnd: Position::from(0, 1), + expectedBufferContents: 'AB', + expectedIsEnd: false, + actualScanner: $scanner, + ); + } + + /** + * @test + */ + public function scanOneOfCannotContinueOnceScannerIsHalted(): void + { + $scanner = new Scanner('ABC'); + $rule1 = RuleFixtures::withMatcher( + MatcherFixtures::nothing() + ); + $rule2 = RuleFixtures::withMatcher( + MatcherFixtures::nothing() + ); + + $scanner->scanOneOf($rule1, $rule2); + + $this->expectException(AssertionError::class); + $scanner->scanOneOf($rule1, $rule2); + } + + /** + * @test + */ + public function dismissReturnsToLastPositionAfterScanMatch(): void + { + $scanner = new Scanner('AABBCC'); + $rule = RuleFixtures::withMatcher( + MatcherFixtures::satisfiedAtOffset(3) + ); + + $scanner->scan($rule); + $scanner->commit(); + $scanner->scan($rule); + + $this->assertScannerState( + expectedBufferStart: Position::from(0, 3), + expectedBufferEnd: Position::from(0, 5), + expectedBufferContents: 'BCC', + expectedIsEnd: true, + actualScanner: $scanner, + ); + + $scanner->dismiss(); + + $this->assertScannerState( + expectedBufferStart: Position::from(0, 0), + expectedBufferEnd: Position::from(0, 2), + expectedBufferContents: 'AAB', + expectedIsEnd: false, + actualScanner: $scanner, + ); + + $scanner->scan($rule); + + $this->assertScannerState( + expectedBufferStart: Position::from(0, 3), + expectedBufferEnd: Position::from(0, 5), + expectedBufferContents: 'BCC', + expectedIsEnd: true, + actualScanner: $scanner, + ); + } + + /** + * @test + */ + public function dismissReturnsToLastPositionAfterScanMismatch(): void + { + $scanner = new Scanner('AAABBBCCC'); + $matchingRule = RuleFixtures::withMatcher( + MatcherFixtures::satisfiedAtOffset(3) + ); + $notMatchingRule = RuleFixtures::withMatcher( + MatcherFixtures::cancelAtOffset(2) + ); + + $scanner->scan($matchingRule); + $scanner->commit(); + $scanner->scan($notMatchingRule); + $scanner->dismiss(); + + $this->assertScannerState( + expectedBufferStart: Position::from(0, 0), + expectedBufferEnd: Position::from(0, 2), + expectedBufferContents: 'AAA', + expectedIsEnd: false, + actualScanner: $scanner, + ); + } + + /** + * @test + */ + public function backspaceReturnsToLastPositionAfterScanOneOfMatch(): void + { + $scanner = new Scanner('AABBCC'); + $rule1 = RuleFixtures::withMatcher( + MatcherFixtures::satisfiedAtOffset(2) + ); + $rule2 = RuleFixtures::withMatcher( + MatcherFixtures::satisfiedAtOffset(3) + ); + $rule3 = RuleFixtures::withMatcher( + MatcherFixtures::satisfiedAtOffset(4) + ); + + $scanner->scanOneOf($rule1, $rule2, $rule3); + $scanner->commit(); + $scanner->scanOneOf($rule1, $rule2, $rule3); + + $this->assertScannerState( + expectedBufferStart: Position::from(0, 2), + expectedBufferEnd: Position::from(0, 3), + expectedBufferContents: 'BB', + expectedIsEnd: false, + actualScanner: $scanner, + ); + + $scanner->dismiss(); + + $this->assertScannerState( + expectedBufferStart: Position::from(0, 0), + expectedBufferEnd: Position::from(0, 1), + expectedBufferContents: 'AA', + expectedIsEnd: false, + actualScanner: $scanner, + ); + } + + /** + * @test + */ + public function backspaceReturnsToLastPositionAfterScanOneOfMismatch(): void + { + $scanner = new Scanner('AAABBBCCC'); + $rule1 = RuleFixtures::withMatcher( + MatcherFixtures::satisfiedAtOffset(2) + ); + $rule2 = RuleFixtures::withMatcher( + MatcherFixtures::cancelAtOffset(2) + ); + $rule3 = RuleFixtures::withMatcher( + MatcherFixtures::cancelAtOffset(3) + ); + + $scanner->scanOneOf($rule1, $rule2, $rule3); + $scanner->commit(); + $scanner->scanOneOf($rule2, $rule3); + + $this->assertScannerState( + expectedBufferStart: Position::from(0, 2), + expectedBufferEnd: Position::from(0, 5), + expectedBufferContents: 'ABBB', + expectedIsEnd: false, + actualScanner: $scanner, + ); + + $scanner->dismiss(); + + $this->assertScannerState( + expectedBufferStart: Position::from(0, 0), + expectedBufferEnd: Position::from(0, 1), + expectedBufferContents: 'AA', + expectedIsEnd: false, + actualScanner: $scanner, + ); + + } + + public static function assertScannerState( + Position $expectedBufferStart, + Position $expectedBufferEnd, + string $expectedBufferContents, + bool $expectedIsEnd, + ScannerInterface $actualScanner, + ): void { + BufferTest::assertBufferState( + expectedStart: $expectedBufferStart, + expectedEnd: $expectedBufferEnd, + expectedContents: $expectedBufferContents, + actualBuffer: $actualScanner->getBuffer(), + message: 'Buffer of scanner was incorrect' + ); + + self::assertEquals( + $expectedIsEnd, + $actualScanner->isEnd(), + $expectedIsEnd + ? 'Scanner continues unexpectedly.' + : 'Scanner ended unexpectedly.' + ); + } +} diff --git a/test/Unit/Language/Parser/Module/ModuleParserTest.php b/test/Unit/Language/Parser/Module/ModuleParserTest.php index 0763d0c..d3b6ce0 100644 --- a/test/Unit/Language/Parser/Module/ModuleParserTest.php +++ b/test/Unit/Language/Parser/Module/ModuleParserTest.php @@ -36,6 +36,7 @@ use PackageFactory\ComponentEngine\Language\AST\Node\StructDeclaration\StructNameNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\LexerException; +use PackageFactory\ComponentEngine\Language\Lexer\Scanner\ScannerException; use PackageFactory\ComponentEngine\Language\Parser\Module\ModuleCouldNotBeParsed; use PackageFactory\ComponentEngine\Language\Parser\Module\ModuleParser; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; @@ -301,9 +302,11 @@ function () { $moduleParser->parse($lexer); }, ModuleCouldNotBeParsed::becauseOfLexerException( - cause: LexerException::becauseOfUnexpectedExceedingSource( - affectedRangeInSource: $this->range([4, 0], [4, 0]), - exceedingCharacter: 'e' + cause: LexerException::becauseOfScannerException( + cause: ScannerException::becauseOfUnexpectedExceedingSource( + affectedRangeInSource: $this->range([4, 0], [4, 0]), + exceedingCharacter: 'e' + ) ) ) ); From 5552c70a6bf9368fb4199c391d6a368f3ee0a407 Mon Sep 17 00:00:00 2001 From: Wilhelm Behncke Date: Thu, 17 Aug 2023 16:21:20 +0200 Subject: [PATCH 09/19] TASK: Replace remaining references to "TokenType" --- src/Language/Lexer/Lexer.php | 12 ++--- src/Language/Lexer/Matcher/Matcher.php | 4 +- .../BooleanLiteral/BooleanLiteralParser.php | 6 +-- .../ComponentDeclarationParser.php | 6 +-- .../EnumDeclaration/EnumDeclarationParser.php | 6 +-- src/Language/Parser/Export/ExportParser.php | 6 +-- .../Parser/Expression/ExpressionParser.php | 32 +++++------ src/Language/Parser/Expression/Precedence.php | 8 +-- src/Language/Parser/Import/ImportParser.php | 4 +- .../IntegerLiteral/IntegerLiteralParser.php | 6 +-- src/Language/Parser/Tag/TagParser.php | 6 +-- src/Language/Parser/Text/TextParser.php | 14 ++--- src/Language/Util/DebugHelper.php | 16 +++--- test/Unit/Language/Lexer/LexerTest.php | 54 +++++++++---------- 14 files changed, 90 insertions(+), 90 deletions(-) diff --git a/src/Language/Lexer/Lexer.php b/src/Language/Lexer/Lexer.php index bdc5d7d..902dced 100644 --- a/src/Language/Lexer/Lexer.php +++ b/src/Language/Lexer/Lexer.php @@ -32,19 +32,19 @@ final class Lexer { - private static Rules $TOKEN_TYPES_SPACE; - private static Rules $TOKEN_TYPES_SPACE_AND_COMMENTS; + private static Rules $RULES_SPACE; + private static Rules $RULES_SPACE_AND_COMMENTS; private readonly Scanner $scanner; private ?Rule $ruleUnderCursor = null; public function __construct(string $source) { - self::$TOKEN_TYPES_SPACE = Rules::from( + self::$RULES_SPACE = Rules::from( Rule::SPACE, Rule::END_OF_LINE ); - self::$TOKEN_TYPES_SPACE_AND_COMMENTS = Rules::from( + self::$RULES_SPACE_AND_COMMENTS = Rules::from( Rule::SPACE, Rule::END_OF_LINE, Rule::COMMENT @@ -223,7 +223,7 @@ public function expectOneOf(Rules $rules): RuleInterface public function skipSpace(): void { - while ($this->scanner->scanOneOf(...self::$TOKEN_TYPES_SPACE->items)) { + while ($this->scanner->scanOneOf(...self::$RULES_SPACE->items)) { $this->scanner->commit(); } @@ -236,7 +236,7 @@ public function skipSpace(): void public function skipSpaceAndComments(): void { - while ($this->scanner->scanOneOf(...self::$TOKEN_TYPES_SPACE_AND_COMMENTS->items)) { + while ($this->scanner->scanOneOf(...self::$RULES_SPACE_AND_COMMENTS->items)) { $this->scanner->commit(); } diff --git a/src/Language/Lexer/Matcher/Matcher.php b/src/Language/Lexer/Matcher/Matcher.php index e6c66f5..f971da9 100644 --- a/src/Language/Lexer/Matcher/Matcher.php +++ b/src/Language/Lexer/Matcher/Matcher.php @@ -37,9 +37,9 @@ abstract class Matcher */ private static $instancesByRule = []; - final public static function for(Rule $tokenType): self + final public static function for(Rule $rule): self { - return self::$instancesByRule[$tokenType->value] ??= match ($tokenType) { + return self::$instancesByRule[$rule->value] ??= match ($rule) { Rule::COMMENT => new Sequence( new Exact('#'), diff --git a/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php b/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php index bbf39d7..436b853 100644 --- a/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php +++ b/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php @@ -32,11 +32,11 @@ final class BooleanLiteralParser { use Singleton; - private static Rules $TOKEN_TYPES_BOOLEAN_KEYWORDS; + private static Rules $RULES_BOOLEAN_KEYWORDS; private function __construct() { - self::$TOKEN_TYPES_BOOLEAN_KEYWORDS ??= Rules::from( + self::$RULES_BOOLEAN_KEYWORDS ??= Rules::from( Rule::KEYWORD_TRUE, Rule::KEYWORD_FALSE ); @@ -44,7 +44,7 @@ private function __construct() public function parse(Lexer $lexer): BooleanLiteralNode { - $lexer->readOneOf(self::$TOKEN_TYPES_BOOLEAN_KEYWORDS); + $lexer->readOneOf(self::$RULES_BOOLEAN_KEYWORDS); return new BooleanLiteralNode( rangeInSource: $lexer->getCursorRange(), diff --git a/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php b/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php index acbe828..1d21193 100644 --- a/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php +++ b/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php @@ -39,14 +39,14 @@ final class ComponentDeclarationParser { use Singleton; - private static Rules $TOKEN_TYPES_SPACE; + private static Rules $RULES_SPACE; private ?PropertyDeclarationParser $propertyDeclarationParser = null; private ?ExpressionParser $returnParser = null; private function __construct() { - self::$TOKEN_TYPES_SPACE ??= Rules::from( + self::$RULES_SPACE ??= Rules::from( Rule::SPACE, Rule::END_OF_LINE ); @@ -108,7 +108,7 @@ private function parseReturn(Lexer $lexer): ExpressionNode $this->returnParser ??= new ExpressionParser(); $lexer->read(Rule::KEYWORD_RETURN); - $lexer->readOneOf(self::$TOKEN_TYPES_SPACE); + $lexer->readOneOf(self::$RULES_SPACE); $lexer->skipSpaceAndComments(); return $this->returnParser->parse($lexer); diff --git a/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php b/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php index 70750b9..0cd2406 100644 --- a/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php +++ b/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php @@ -44,14 +44,14 @@ final class EnumDeclarationParser { use Singleton; - private static Rules $TOKEN_TYPES_ENUM_MEMBER_VALUE_START; + private static Rules $RULES_ENUM_MEMBER_VALUE_START; private ?StringLiteralParser $stringLiteralParser = null; private ?IntegerLiteralParser $integerLiteralParser = null; private function __construct() { - self::$TOKEN_TYPES_ENUM_MEMBER_VALUE_START ??= Rules::from( + self::$RULES_ENUM_MEMBER_VALUE_START ??= Rules::from( Rule::STRING_LITERAL_DELIMITER, Rule::INTEGER_BINARY, Rule::INTEGER_OCTAL, @@ -138,7 +138,7 @@ private function parseEnumMemberValue(Lexer $lexer): ?EnumMemberValueNode if ($lexer->probe(Rule::BRACKET_ROUND_OPEN)) { $start = $lexer->getStartPosition(); - $value = match ($lexer->expectOneOf(self::$TOKEN_TYPES_ENUM_MEMBER_VALUE_START)) { + $value = match ($lexer->expectOneOf(self::$RULES_ENUM_MEMBER_VALUE_START)) { Rule::STRING_LITERAL_DELIMITER => $this->parseStringLiteral($lexer), default => diff --git a/src/Language/Parser/Export/ExportParser.php b/src/Language/Parser/Export/ExportParser.php index 645349a..9ba5662 100644 --- a/src/Language/Parser/Export/ExportParser.php +++ b/src/Language/Parser/Export/ExportParser.php @@ -41,7 +41,7 @@ final class ExportParser { use Singleton; - private static Rules $TOKEN_TYPES_DECLARATION_KEYWORDS; + private static Rules $RULES_DECLARATION_KEYWORDS; private ?ComponentDeclarationParser $componentDeclarationParser = null; private ?EnumDeclarationParser $enumDeclarationParser = null; @@ -49,7 +49,7 @@ final class ExportParser private function __construct() { - self::$TOKEN_TYPES_DECLARATION_KEYWORDS ??= Rules::from( + self::$RULES_DECLARATION_KEYWORDS ??= Rules::from( Rule::KEYWORD_COMPONENT, Rule::KEYWORD_ENUM, Rule::KEYWORD_STRUCT @@ -64,7 +64,7 @@ public function parse(Lexer $lexer): ExportNode $lexer->skipSpace(); - $declaration = match ($lexer->expectOneOf(self::$TOKEN_TYPES_DECLARATION_KEYWORDS)) { + $declaration = match ($lexer->expectOneOf(self::$RULES_DECLARATION_KEYWORDS)) { Rule::KEYWORD_COMPONENT => $this->parseComponentDeclaration($lexer), Rule::KEYWORD_ENUM => $this->parseEnumDeclaration($lexer), Rule::KEYWORD_STRUCT => $this->parseStructDeclaration($lexer), diff --git a/src/Language/Parser/Expression/ExpressionParser.php b/src/Language/Parser/Expression/ExpressionParser.php index 3d239fe..676f3cf 100644 --- a/src/Language/Parser/Expression/ExpressionParser.php +++ b/src/Language/Parser/Expression/ExpressionParser.php @@ -48,10 +48,10 @@ final class ExpressionParser { - private static Rules $TOKEN_TYPES_ACCESS; - private static Rules $TOKEN_TYPES_BINARY_OPERATORS; - private static Rules $TOKEN_TYPES_UNARY; - private static Rules $TOKEN_TYPES_CLOSING_DELIMITERS; + private static Rules $RULES_ACCESS; + private static Rules $RULES_BINARY_OPERATORS; + private static Rules $RULES_UNARY; + private static Rules $RULES_CLOSING_DELIMITERS; private ?BooleanLiteralParser $booleanLiteralParser = null; private ?IntegerLiteralParser $integerLiteralParser = null; @@ -65,11 +65,11 @@ final class ExpressionParser public function __construct( private Precedence $precedence = Precedence::SEQUENCE ) { - self::$TOKEN_TYPES_ACCESS ??= Rules::from( + self::$RULES_ACCESS ??= Rules::from( Rule::SYMBOL_PERIOD, Rule::SYMBOL_OPTCHAIN ); - self::$TOKEN_TYPES_BINARY_OPERATORS ??= Rules::from( + self::$RULES_BINARY_OPERATORS ??= Rules::from( Rule::SYMBOL_NULLISH_COALESCE, Rule::SYMBOL_BOOLEAN_AND, Rule::SYMBOL_BOOLEAN_OR, @@ -78,7 +78,7 @@ public function __construct( Rule::SYMBOL_GREATER_THAN, Rule::SYMBOL_LESS_THAN ); - self::$TOKEN_TYPES_UNARY ??= Rules::from( + self::$RULES_UNARY ??= Rules::from( Rule::SYMBOL_EXCLAMATIONMARK, Rule::KEYWORD_TRUE, Rule::KEYWORD_FALSE, @@ -93,7 +93,7 @@ public function __construct( Rule::BRACKET_ANGLE_OPEN, Rule::BRACKET_ROUND_OPEN ); - self::$TOKEN_TYPES_CLOSING_DELIMITERS = Rules::from( + self::$RULES_CLOSING_DELIMITERS = Rules::from( Rule::BRACKET_CURLY_OPEN, Rule::BRACKET_CURLY_CLOSE, Rule::BRACKET_ROUND_CLOSE, @@ -110,11 +110,11 @@ public function parse(Lexer $lexer): ExpressionNode while (!$lexer->isEnd()) { $lexer->skipSpaceAndComments(); - if ($lexer->peekOneOf(self::$TOKEN_TYPES_CLOSING_DELIMITERS)) { + if ($lexer->peekOneOf(self::$RULES_CLOSING_DELIMITERS)) { return $result; } - if ($lexer->probeOneOf(self::$TOKEN_TYPES_ACCESS)) { + if ($lexer->probeOneOf(self::$RULES_ACCESS)) { $result = $this->parseAcccess($lexer, $result); continue; } @@ -128,9 +128,9 @@ public function parse(Lexer $lexer): ExpressionNode continue; } - if ($tokenType = $lexer->peekOneOf(self::$TOKEN_TYPES_BINARY_OPERATORS)) { - assert($tokenType instanceof Rule); - if ($this->precedence->mustStopAt($tokenType)) { + if ($rule = $lexer->peekOneOf(self::$RULES_BINARY_OPERATORS)) { + assert($rule instanceof Rule); + if ($this->precedence->mustStopAt($rule)) { return $result; } @@ -149,7 +149,7 @@ private function parseUnaryStatement(Lexer $lexer): ExpressionNode if ($lexer->peek(Rule::TEMPLATE_LITERAL_DELIMITER)) { $result = $this->parseTemplateLiteral($lexer); } else { - $result = match ($lexer->expectOneOf(self::$TOKEN_TYPES_UNARY)) { + $result = match ($lexer->expectOneOf(self::$RULES_UNARY)) { Rule::SYMBOL_EXCLAMATIONMARK => $this->parseUnaryOperation($lexer), Rule::KEYWORD_TRUE, @@ -356,7 +356,7 @@ private function parseAcccess(Lexer $lexer, ExpressionNode $parent): ExpressionN $lexer->skipSpaceAndComments(); - if (!$lexer->probeOneOf(self::$TOKEN_TYPES_ACCESS)) { + if (!$lexer->probeOneOf(self::$RULES_ACCESS)) { break; } } @@ -407,7 +407,7 @@ private function parseBinaryOperator(Lexer $lexer): BinaryOperator return BinaryOperator::LESS_THAN_OR_EQUAL; } - $lexer->readOneOf(self::$TOKEN_TYPES_BINARY_OPERATORS); + $lexer->readOneOf(self::$RULES_BINARY_OPERATORS); $operator = match ($lexer->getRuleUnderCursor()) { Rule::SYMBOL_NULLISH_COALESCE => BinaryOperator::NULLISH_COALESCE, Rule::SYMBOL_BOOLEAN_AND => BinaryOperator::AND, diff --git a/src/Language/Parser/Expression/Precedence.php b/src/Language/Parser/Expression/Precedence.php index b13a217..f297c7f 100644 --- a/src/Language/Parser/Expression/Precedence.php +++ b/src/Language/Parser/Expression/Precedence.php @@ -40,9 +40,9 @@ enum Precedence: int case TERNARY = 3; case SEQUENCE = 1; - public static function forRule(Rule $tokenType): self + public static function forRule(Rule $rule): self { - return match ($tokenType) { + return match ($rule) { Rule::BRACKET_ROUND_OPEN, Rule::BRACKET_ROUND_CLOSE, Rule::BRACKET_SQUARE_OPEN, @@ -90,8 +90,8 @@ public static function forBinaryOperator(BinaryOperator $binaryOperator): self }; } - public function mustStopAt(Rule $tokenType): bool + public function mustStopAt(Rule $rule): bool { - return self::forRule($tokenType)->value <= $this->value; + return self::forRule($rule)->value <= $this->value; } } diff --git a/src/Language/Parser/Import/ImportParser.php b/src/Language/Parser/Import/ImportParser.php index be28219..b42d888 100644 --- a/src/Language/Parser/Import/ImportParser.php +++ b/src/Language/Parser/Import/ImportParser.php @@ -41,13 +41,13 @@ final class ImportParser { use Singleton; - private static Rules $TOKEN_TYPES_NAME_BOUNDARIES; + private static Rules $RULES_NAME_BOUNDARIES; private ?StringLiteralParser $pathParser = null; private function __construct() { - self::$TOKEN_TYPES_NAME_BOUNDARIES ??= Rules::from( + self::$RULES_NAME_BOUNDARIES ??= Rules::from( Rule::WORD, Rule::SYMBOL_COMMA, Rule::BRACKET_CURLY_CLOSE diff --git a/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php b/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php index 5cb1dd2..b37bfe0 100644 --- a/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php +++ b/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php @@ -64,9 +64,9 @@ public function parse(Lexer $lexer): IntegerLiteralNode } } - private function getIntegerFormatFromToken(Rule $tokenType): IntegerFormat + private function getIntegerFormatFromToken(Rule $rule): IntegerFormat { - return match ($tokenType) { + return match ($rule) { Rule::INTEGER_BINARY => IntegerFormat::BINARY, Rule::INTEGER_OCTAL => IntegerFormat::OCTAL, Rule::INTEGER_DECIMAL => IntegerFormat::DECIMAL, @@ -74,7 +74,7 @@ private function getIntegerFormatFromToken(Rule $tokenType): IntegerFormat default => throw new LogicException( sprintf( 'Expected %s to be one of %s', - $tokenType->value, + $rule->value, DebugHelper::describeRules($this->INTEGER_TOKEN_TYPES) ) ) diff --git a/src/Language/Parser/Tag/TagParser.php b/src/Language/Parser/Tag/TagParser.php index 002e696..9df85d0 100644 --- a/src/Language/Parser/Tag/TagParser.php +++ b/src/Language/Parser/Tag/TagParser.php @@ -47,7 +47,7 @@ final class TagParser { use Singleton; - private static Rules $TOKEN_TYPES_ATTRIBUTE_DELIMITERS; + private static Rules $RULES_ATTRIBUTE_DELIMITERS; private ?StringLiteralParser $stringLiteralParser = null; private ?TextParser $textParser = null; @@ -55,7 +55,7 @@ final class TagParser private function __construct() { - self::$TOKEN_TYPES_ATTRIBUTE_DELIMITERS ??= Rules::from( + self::$RULES_ATTRIBUTE_DELIMITERS ??= Rules::from( Rule::STRING_LITERAL_DELIMITER, Rule::BRACKET_CURLY_OPEN ); @@ -153,7 +153,7 @@ private function parseAttributeName(Lexer $lexer): AttributeNameNode private function parseAttributeValue(Lexer $lexer): null|StringLiteralNode|ExpressionNode { if ($lexer->probe(Rule::SYMBOL_EQUALS)) { - return match ($lexer->expectOneOf(self::$TOKEN_TYPES_ATTRIBUTE_DELIMITERS)) { + return match ($lexer->expectOneOf(self::$RULES_ATTRIBUTE_DELIMITERS)) { Rule::STRING_LITERAL_DELIMITER => $this->parseString($lexer), Rule::BRACKET_CURLY_OPEN => diff --git a/src/Language/Parser/Text/TextParser.php b/src/Language/Parser/Text/TextParser.php index 6f5a896..972b783 100644 --- a/src/Language/Parser/Text/TextParser.php +++ b/src/Language/Parser/Text/TextParser.php @@ -34,17 +34,17 @@ final class TextParser { use Singleton; - private static Rules $TOKEN_TYPES_END_DELIMITERS; - private static Rules $TOKEN_TYPES_CONTENT; + private static Rules $RULES_END_DELIMITERS; + private static Rules $RULES_CONTENT; private function __construct() { - self::$TOKEN_TYPES_END_DELIMITERS = Rules::from( + self::$RULES_END_DELIMITERS = Rules::from( Rule::SYMBOL_CLOSE_TAG, Rule::BRACKET_ANGLE_OPEN, Rule::BRACKET_CURLY_OPEN ); - self::$TOKEN_TYPES_CONTENT = Rules::from( + self::$RULES_CONTENT = Rules::from( Rule::SPACE, Rule::END_OF_LINE, Rule::TEXT @@ -68,15 +68,15 @@ public function parse(Lexer $lexer, bool $preserveLeadingSpace = false): ?TextNo } $lexer->skipSpace(); - if ($lexer->isEnd() || $lexer->peekOneOf(self::$TOKEN_TYPES_END_DELIMITERS)) { + if ($lexer->isEnd() || $lexer->peekOneOf(self::$RULES_END_DELIMITERS)) { return null; } $hasTrailingSpace = false; $trailingSpaceContainsLineBreaks = false; $value = $hasLeadingSpace && $preserveLeadingSpace ? ' ' : ''; - while (!$lexer->isEnd() && !$lexer->peekOneOf(self::$TOKEN_TYPES_END_DELIMITERS)) { - $lexer->readOneOf(self::$TOKEN_TYPES_CONTENT); + while (!$lexer->isEnd() && !$lexer->peekOneOf(self::$RULES_END_DELIMITERS)) { + $lexer->readOneOf(self::$RULES_CONTENT); if ($lexer->getRuleUnderCursor() === Rule::TEXT) { $start ??= $lexer->getStartPosition(); diff --git a/src/Language/Util/DebugHelper.php b/src/Language/Util/DebugHelper.php index 46b8caa..2c6276b 100644 --- a/src/Language/Util/DebugHelper.php +++ b/src/Language/Util/DebugHelper.php @@ -38,9 +38,9 @@ final class DebugHelper { - public static function describeRule(Rule $tokenType): string + public static function describeRule(Rule $rule): string { - return $tokenType->value . match ($tokenType) { + return $rule->value . match ($rule) { Rule::COMMENT => ' (e.g. "# ...")', Rule::KEYWORD_FROM => ' ("from")', @@ -111,17 +111,17 @@ public static function describeRule(Rule $tokenType): string }; } - public static function describeRules(Rules $tokenTypes): string + public static function describeRules(Rules $rules): string { - if (count($tokenTypes->items) === 1) { - return self::describeRule($tokenTypes->items[0]); + if (count($rules->items) === 1) { + return self::describeRule($rules->items[0]); } - $leadingItems = array_slice($tokenTypes->items, 0, -1); - $trailingItem = array_slice($tokenTypes->items, -1)[0]; + $leadingItems = array_slice($rules->items, 0, -1); + $trailingItem = array_slice($rules->items, -1)[0]; return join(', ', array_map( - static fn (Rule $tokenType) => self::describeRule($tokenType), + static fn (Rule $rule) => self::describeRule($rule), $leadingItems )) . ' or ' . self::describeRule($trailingItem); } diff --git a/test/Unit/Language/Lexer/LexerTest.php b/test/Unit/Language/Lexer/LexerTest.php index 51f2587..f22dc09 100644 --- a/test/Unit/Language/Lexer/LexerTest.php +++ b/test/Unit/Language/Lexer/LexerTest.php @@ -37,7 +37,7 @@ final class LexerTest extends TestCase protected function assertLexerState( Position $startPosition, Position $endPosition, - Rule $tokenTypeUnderCursor, + Rule $ruleUnderCursor, string $buffer, bool $isEnd ): void { @@ -54,7 +54,7 @@ protected function assertLexerState( ); $this->assertEquals( - $tokenTypeUnderCursor, + $ruleUnderCursor, $this->lexer->getRuleUnderCursor(), 'Failed asserting that token type under cursor of lexer equals' ); @@ -253,7 +253,7 @@ public function readSavesTokenOfGivenTypeIfMatchIsFound(string $source, Rule $ex $this->assertLexerState( startPosition: Position::from(0, 0), endPosition: Position::from(0, \mb_strlen($source) - 1), - tokenTypeUnderCursor: $expectedRule, + ruleUnderCursor: $expectedRule, buffer: $source, isEnd: true ); @@ -274,7 +274,7 @@ public function readOneOfSavesTokenOfGivenTypeIfMatchIsFound(string $source, Rul $this->assertLexerState( startPosition: Position::from(0, 0), endPosition: Position::from(0, \mb_strlen($source) - 1), - tokenTypeUnderCursor: $expectedRule, + ruleUnderCursor: $expectedRule, buffer: $source, isEnd: true ); @@ -507,18 +507,18 @@ public static function multipleTokensExamples(): iterable */ public function testReadOneOfWithMultipleRules( string $source, - Rules $tokenTypes, + Rules $rules, array ...$expectedLexerStates ): void { $this->lexer = new Lexer($source); foreach ($expectedLexerStates as $i => $expectedLexerState) { - $this->lexer->readOneOf($tokenTypes); + $this->lexer->readOneOf($rules); $this->assertLexerState( startPosition: Position::from(...$expectedLexerState[0]), endPosition: Position::from(...$expectedLexerState[1]), - tokenTypeUnderCursor: $expectedLexerState[2], + ruleUnderCursor: $expectedLexerState[2], buffer: $expectedLexerState[3], isEnd: $i === count($expectedLexerStates) - 1 ); @@ -685,10 +685,10 @@ public static function failingMultipleTokensExamples(): iterable { yield ($source = "# This is a comment\nThis is not a comment") => [ $source, - $tokenTypes = Rules::from(Rule::COMMENT, Rule::END_OF_LINE), + $rules = Rules::from(Rule::COMMENT, Rule::END_OF_LINE), 3, LexerException::becauseOfUnexpectedCharacterSequence( - expectedRules: $tokenTypes, + expectedRules: $rules, affectedRangeInSource: Range::from( Position::from(1, 0), Position::from(1, 0) @@ -702,23 +702,23 @@ public static function failingMultipleTokensExamples(): iterable * @dataProvider failingMultipleTokensExamples * @test * @param string $source - * @param Rules $tokenTypes + * @param Rules $rules * @param integer $numberOfReadOperations * @param LexerException $expectedLexerException * @return void */ public function throwsIfCharacterSequenceDoesNotMatchMultipleRules( string $source, - Rules $tokenTypes, + Rules $rules, int $numberOfReadOperations, LexerException $expectedLexerException ): void { $this->assertThrowsLexerException( - function () use ($source, $tokenTypes, $numberOfReadOperations) { + function () use ($source, $rules, $numberOfReadOperations) { $this->lexer = new Lexer($source); foreach(range(0, $numberOfReadOperations) as $i) { - $this->lexer->readOneOf($tokenTypes); + $this->lexer->readOneOf($rules); } }, $expectedLexerException @@ -767,14 +767,14 @@ public static function multipleRuleUnexpectedEndOfSourceExamples(): iterable { yield ($source = '') => [ $source, - $tokenTypes = Rules::from( + $rules = Rules::from( Rule::KEYWORD_RETURN, Rule::KEYWORD_NULL, Rule::SPACE ), 1, LexerException::becauseOfUnexpectedEndOfSource( - expectedRules: $tokenTypes, + expectedRules: $rules, affectedRangeInSource: Range::from( Position::from(0, 0), Position::from(0, 0) @@ -784,14 +784,14 @@ public static function multipleRuleUnexpectedEndOfSourceExamples(): iterable yield ($source = 'return') => [ $source, - $tokenTypes = Rules::from( + $rules = Rules::from( Rule::KEYWORD_RETURN, Rule::KEYWORD_NULL, Rule::SPACE ), 2, LexerException::becauseOfUnexpectedEndOfSource( - expectedRules: $tokenTypes, + expectedRules: $rules, affectedRangeInSource: Range::from( Position::from(0, 6), Position::from(0, 6) @@ -801,14 +801,14 @@ public static function multipleRuleUnexpectedEndOfSourceExamples(): iterable yield ($source = 'return ') => [ $source, - $tokenTypes = Rules::from( + $rules = Rules::from( Rule::KEYWORD_RETURN, Rule::KEYWORD_NULL, Rule::SPACE ), 3, LexerException::becauseOfUnexpectedEndOfSource( - expectedRules: $tokenTypes, + expectedRules: $rules, affectedRangeInSource: Range::from( Position::from(0, 7), Position::from(0, 7) @@ -821,23 +821,23 @@ public static function multipleRuleUnexpectedEndOfSourceExamples(): iterable * @dataProvider multipleRuleUnexpectedEndOfSourceExamples * @test * @param string $source - * @param Rules $tokenTypes + * @param Rules $rules * @param integer $numberOfReadOperations * @param LexerException $expectedLexerException * @return void */ public function throwsIfSourceEndsUnexpectedlyWhileReadingMultipleRules( string $source, - Rules $tokenTypes, + Rules $rules, int $numberOfReadOperations, LexerException $expectedLexerException ): void { $this->assertThrowsLexerException( - function () use ($source, $tokenTypes, $numberOfReadOperations) { + function () use ($source, $rules, $numberOfReadOperations) { $this->lexer = new Lexer($source); foreach(range(0, $numberOfReadOperations) as $i) { - $this->lexer->readOneOf($tokenTypes); + $this->lexer->readOneOf($rules); } }, $expectedLexerException @@ -859,7 +859,7 @@ public function skipsSpace(): void $this->assertLexerState( startPosition: Position::from(1, 4), endPosition: Position::from(1, 5), - tokenTypeUnderCursor: Rule::INTEGER_DECIMAL, + ruleUnderCursor: Rule::INTEGER_DECIMAL, buffer: '42', isEnd: true ); @@ -874,7 +874,7 @@ public function skipsSpace(): void $this->assertLexerState( startPosition: Position::from(1, 4), endPosition: Position::from(1, 5), - tokenTypeUnderCursor: Rule::INTEGER_DECIMAL, + ruleUnderCursor: Rule::INTEGER_DECIMAL, buffer: '42', isEnd: true ); @@ -907,7 +907,7 @@ public function skipsSpaceAndComments(): void $this->assertLexerState( startPosition: Position::from(6, 4), endPosition: Position::from(6, 12), - tokenTypeUnderCursor: Rule::KEYWORD_COMPONENT, + ruleUnderCursor: Rule::KEYWORD_COMPONENT, buffer: 'component', isEnd: true ); @@ -941,7 +941,7 @@ public function skipsSpaceAndComments(): void $this->assertLexerState( startPosition: Position::from(6, 4), endPosition: Position::from(6, 12), - tokenTypeUnderCursor: Rule::KEYWORD_COMPONENT, + ruleUnderCursor: Rule::KEYWORD_COMPONENT, buffer: 'component', isEnd: true ); From 2ef898929df44560fd8cf2709399e3030dc7a15d Mon Sep 17 00:00:00 2001 From: Wilhelm Behncke Date: Thu, 17 Aug 2023 17:01:48 +0200 Subject: [PATCH 10/19] TASK: Remove method Lexer::getRuleUnderCursor and replace call-sites --- src/Language/Lexer/Lexer.php | 28 ++++++++----------- .../BooleanLiteral/BooleanLiteralParser.php | 4 +-- .../Parser/Expression/ExpressionParser.php | 23 ++++++--------- .../IntegerLiteral/IntegerLiteralParser.php | 4 +-- src/Language/Parser/Text/TextParser.php | 6 ++-- test/Unit/Language/Lexer/LexerTest.php | 14 ---------- 6 files changed, 26 insertions(+), 53 deletions(-) diff --git a/src/Language/Lexer/Lexer.php b/src/Language/Lexer/Lexer.php index 902dced..2ff7962 100644 --- a/src/Language/Lexer/Lexer.php +++ b/src/Language/Lexer/Lexer.php @@ -36,7 +36,6 @@ final class Lexer private static Rules $RULES_SPACE_AND_COMMENTS; private readonly Scanner $scanner; - private ?Rule $ruleUnderCursor = null; public function __construct(string $source) { @@ -53,13 +52,6 @@ public function __construct(string $source) $this->scanner = new Scanner($source); } - public function getRuleUnderCursor(): Rule - { - assert($this->ruleUnderCursor !== null); - - return $this->ruleUnderCursor; - } - public function getBuffer(): string { return $this->scanner->getBuffer()->getContents(); @@ -98,7 +90,6 @@ public function read(Rule $rule): void { if ($this->scanner->scan($rule)) { $this->scanner->commit(); - $this->ruleUnderCursor = $rule; return; } @@ -116,13 +107,13 @@ public function read(Rule $rule): void ); } - public function readOneOf(Rules $rules): void + /** @phpstan-impure */ + public function readOneOf(Rules $rules): Rule { if ($rule = $this->scanner->scanOneOf(...$rules->items)) { $this->scanner->commit(); assert($rule instanceof Rule); - $this->ruleUnderCursor = $rule; - return; + return $rule; } if ($this->scanner->isEnd()) { @@ -143,7 +134,6 @@ public function probe(Rule $rule): bool { if ($this->scanner->scan($rule)) { $this->scanner->commit(); - $this->ruleUnderCursor = $rule; return true; } @@ -151,12 +141,12 @@ public function probe(Rule $rule): bool return false; } - public function probeOneOf(Rules $rules): ?RuleInterface + /** @phpstan-impure */ + public function probeOneOf(Rules $rules): ?Rule { if ($rule = $this->scanner->scanOneOf(...$rules->items)) { $this->scanner->commit(); assert($rule instanceof Rule); - $this->ruleUnderCursor = $rule; return $rule; } @@ -172,11 +162,13 @@ public function peek(Rule $rule): bool return $result; } - public function peekOneOf(Rules $rules): ?RuleInterface + /** @phpstan-impure */ + public function peekOneOf(Rules $rules): ?Rule { $rule = $this->scanner->scanOneOf(...$rules->items); $this->scanner->dismiss(); + assert($rule === null || $rule instanceof Rule); return $rule; } @@ -200,7 +192,8 @@ public function expect(Rule $rule): void $this->scanner->dismiss(); } - public function expectOneOf(Rules $rules): RuleInterface + /** @phpstan-impure */ + public function expectOneOf(Rules $rules): Rule { if ($this->scanner->isEnd()) { throw LexerException::becauseOfUnexpectedEndOfSource( @@ -211,6 +204,7 @@ public function expectOneOf(Rules $rules): RuleInterface if ($rule = $this->scanner->scanOneOf(...$rules->items)) { $this->scanner->dismiss(); + assert($rule instanceof Rule); return $rule; } diff --git a/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php b/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php index 436b853..6ea47ff 100644 --- a/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php +++ b/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php @@ -44,11 +44,11 @@ private function __construct() public function parse(Lexer $lexer): BooleanLiteralNode { - $lexer->readOneOf(self::$RULES_BOOLEAN_KEYWORDS); + $rule = $lexer->readOneOf(self::$RULES_BOOLEAN_KEYWORDS); return new BooleanLiteralNode( rangeInSource: $lexer->getCursorRange(), - value: $lexer->getRuleUnderCursor() === Rule::KEYWORD_TRUE + value: $rule === Rule::KEYWORD_TRUE ); } } diff --git a/src/Language/Parser/Expression/ExpressionParser.php b/src/Language/Parser/Expression/ExpressionParser.php index 676f3cf..cc1ff7e 100644 --- a/src/Language/Parser/Expression/ExpressionParser.php +++ b/src/Language/Parser/Expression/ExpressionParser.php @@ -114,8 +114,8 @@ public function parse(Lexer $lexer): ExpressionNode return $result; } - if ($lexer->probeOneOf(self::$RULES_ACCESS)) { - $result = $this->parseAcccess($lexer, $result); + if ($lexer->peekOneOf(self::$RULES_ACCESS)) { + $result = $this->parseAccess($lexer, $result); continue; } @@ -331,11 +331,9 @@ private function parseBracketedExpression(Lexer $lexer): ExpressionNode ); } - private function parseAcccess(Lexer $lexer, ExpressionNode $parent): ExpressionNode + private function parseAccess(Lexer $lexer, ExpressionNode $parent): ExpressionNode { - while (!$lexer->isEnd()) { - $type = $this->parseAccessType($lexer); - + while ($type = $this->parseAccessType($lexer)) { $lexer->read(Rule::WORD); $accessNode = new AccessNode( rangeInSource: $parent->rangeInSource->start->toRange( @@ -355,21 +353,17 @@ private function parseAcccess(Lexer $lexer, ExpressionNode $parent): ExpressionN ); $lexer->skipSpaceAndComments(); - - if (!$lexer->probeOneOf(self::$RULES_ACCESS)) { - break; - } } return $parent; } - private function parseAccessType(Lexer $lexer): AccessType + private function parseAccessType(Lexer $lexer): ?AccessType { - return match ($lexer->getRuleUnderCursor()) { + return match ($lexer->probeOneOf(self::$RULES_ACCESS)) { Rule::SYMBOL_PERIOD => AccessType::MANDATORY, Rule::SYMBOL_OPTCHAIN => AccessType::OPTIONAL, - default => throw new LogicException($lexer->getRuleUnderCursor()->name) + default => null }; } @@ -407,8 +401,7 @@ private function parseBinaryOperator(Lexer $lexer): BinaryOperator return BinaryOperator::LESS_THAN_OR_EQUAL; } - $lexer->readOneOf(self::$RULES_BINARY_OPERATORS); - $operator = match ($lexer->getRuleUnderCursor()) { + $operator = match ($lexer->readOneOf(self::$RULES_BINARY_OPERATORS)) { Rule::SYMBOL_NULLISH_COALESCE => BinaryOperator::NULLISH_COALESCE, Rule::SYMBOL_BOOLEAN_AND => BinaryOperator::AND, Rule::SYMBOL_BOOLEAN_OR => BinaryOperator::OR, diff --git a/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php b/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php index b37bfe0..89290f5 100644 --- a/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php +++ b/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php @@ -52,11 +52,11 @@ private function __construct() public function parse(Lexer $lexer): IntegerLiteralNode { try { - $lexer->readOneOf(self::$INTEGER_TOKEN_TYPES); + $rule = $lexer->readOneOf(self::$INTEGER_TOKEN_TYPES); return new IntegerLiteralNode( rangeInSource: $lexer->getCursorRange(), - format: $this->getIntegerFormatFromToken($lexer->getRuleUnderCursor()), + format: $this->getIntegerFormatFromToken($rule), value: $lexer->getBuffer() ); } catch (LexerException $e) { diff --git a/src/Language/Parser/Text/TextParser.php b/src/Language/Parser/Text/TextParser.php index 972b783..4619515 100644 --- a/src/Language/Parser/Text/TextParser.php +++ b/src/Language/Parser/Text/TextParser.php @@ -76,9 +76,9 @@ public function parse(Lexer $lexer, bool $preserveLeadingSpace = false): ?TextNo $trailingSpaceContainsLineBreaks = false; $value = $hasLeadingSpace && $preserveLeadingSpace ? ' ' : ''; while (!$lexer->isEnd() && !$lexer->peekOneOf(self::$RULES_END_DELIMITERS)) { - $lexer->readOneOf(self::$RULES_CONTENT); + $rule = $lexer->readOneOf(self::$RULES_CONTENT); - if ($lexer->getRuleUnderCursor() === Rule::TEXT) { + if ($rule === Rule::TEXT) { $start ??= $lexer->getStartPosition(); if ($hasTrailingSpace) { $value .= ' '; @@ -89,7 +89,7 @@ public function parse(Lexer $lexer, bool $preserveLeadingSpace = false): ?TextNo continue; } - if ($lexer->getRuleUnderCursor() === Rule::END_OF_LINE) { + if ($rule === Rule::END_OF_LINE) { $trailingSpaceContainsLineBreaks = true; } diff --git a/test/Unit/Language/Lexer/LexerTest.php b/test/Unit/Language/Lexer/LexerTest.php index f22dc09..2f79405 100644 --- a/test/Unit/Language/Lexer/LexerTest.php +++ b/test/Unit/Language/Lexer/LexerTest.php @@ -37,7 +37,6 @@ final class LexerTest extends TestCase protected function assertLexerState( Position $startPosition, Position $endPosition, - Rule $ruleUnderCursor, string $buffer, bool $isEnd ): void { @@ -53,12 +52,6 @@ protected function assertLexerState( 'Failed asserting that end position of lexer equals' ); - $this->assertEquals( - $ruleUnderCursor, - $this->lexer->getRuleUnderCursor(), - 'Failed asserting that token type under cursor of lexer equals' - ); - $this->assertEquals( $buffer, $this->lexer->getBuffer(), @@ -253,7 +246,6 @@ public function readSavesTokenOfGivenTypeIfMatchIsFound(string $source, Rule $ex $this->assertLexerState( startPosition: Position::from(0, 0), endPosition: Position::from(0, \mb_strlen($source) - 1), - ruleUnderCursor: $expectedRule, buffer: $source, isEnd: true ); @@ -274,7 +266,6 @@ public function readOneOfSavesTokenOfGivenTypeIfMatchIsFound(string $source, Rul $this->assertLexerState( startPosition: Position::from(0, 0), endPosition: Position::from(0, \mb_strlen($source) - 1), - ruleUnderCursor: $expectedRule, buffer: $source, isEnd: true ); @@ -518,7 +509,6 @@ public function testReadOneOfWithMultipleRules( $this->assertLexerState( startPosition: Position::from(...$expectedLexerState[0]), endPosition: Position::from(...$expectedLexerState[1]), - ruleUnderCursor: $expectedLexerState[2], buffer: $expectedLexerState[3], isEnd: $i === count($expectedLexerStates) - 1 ); @@ -859,7 +849,6 @@ public function skipsSpace(): void $this->assertLexerState( startPosition: Position::from(1, 4), endPosition: Position::from(1, 5), - ruleUnderCursor: Rule::INTEGER_DECIMAL, buffer: '42', isEnd: true ); @@ -874,7 +863,6 @@ public function skipsSpace(): void $this->assertLexerState( startPosition: Position::from(1, 4), endPosition: Position::from(1, 5), - ruleUnderCursor: Rule::INTEGER_DECIMAL, buffer: '42', isEnd: true ); @@ -907,7 +895,6 @@ public function skipsSpaceAndComments(): void $this->assertLexerState( startPosition: Position::from(6, 4), endPosition: Position::from(6, 12), - ruleUnderCursor: Rule::KEYWORD_COMPONENT, buffer: 'component', isEnd: true ); @@ -941,7 +928,6 @@ public function skipsSpaceAndComments(): void $this->assertLexerState( startPosition: Position::from(6, 4), endPosition: Position::from(6, 12), - ruleUnderCursor: Rule::KEYWORD_COMPONENT, buffer: 'component', isEnd: true ); From 0d35b1f443a9a47c6afde72e60a6c43cc0e96021 Mon Sep 17 00:00:00 2001 From: Wilhelm Behncke Date: Thu, 17 Aug 2023 17:10:30 +0200 Subject: [PATCH 11/19] TASK: Streamline Lexer interface by exposing buffer object directly --- src/Language/Lexer/Lexer.php | 24 ++++--------------- .../BooleanLiteral/BooleanLiteralParser.php | 2 +- .../ComponentDeclarationParser.php | 8 +++---- .../EnumDeclaration/EnumDeclarationParser.php | 16 ++++++------- src/Language/Parser/Export/ExportParser.php | 4 ++-- .../Parser/Expression/ExpressionParser.php | 12 +++++----- src/Language/Parser/Import/ImportParser.php | 16 ++++++------- .../IntegerLiteral/IntegerLiteralParser.php | 4 ++-- src/Language/Parser/Match/MatchParser.php | 10 ++++---- .../Parser/NullLiteral/NullLiteralParser.php | 2 +- .../PropertyDeclarationParser.php | 4 ++-- .../StringLiteral/StringLiteralParser.php | 8 +++---- .../StructDeclarationParser.php | 8 +++---- src/Language/Parser/Tag/TagParser.php | 22 ++++++++--------- .../TemplateLiteral/TemplateLiteralParser.php | 14 +++++------ src/Language/Parser/Text/TextParser.php | 10 ++++---- .../TypeReference/TypeReferenceParser.php | 10 ++++---- .../ValueReference/ValueReferenceParser.php | 4 ++-- test/Unit/Language/Lexer/LexerTest.php | 6 ++--- 19 files changed, 84 insertions(+), 100 deletions(-) diff --git a/src/Language/Lexer/Lexer.php b/src/Language/Lexer/Lexer.php index 2ff7962..dbe9afe 100644 --- a/src/Language/Lexer/Lexer.php +++ b/src/Language/Lexer/Lexer.php @@ -22,6 +22,7 @@ namespace PackageFactory\ComponentEngine\Language\Lexer; +use PackageFactory\ComponentEngine\Language\Lexer\Buffer\Buffer; use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; use PackageFactory\ComponentEngine\Language\Lexer\Rule\RuleInterface; use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; @@ -37,6 +38,8 @@ final class Lexer private readonly Scanner $scanner; + public readonly Buffer $buffer; + public function __construct(string $source) { self::$RULES_SPACE = Rules::from( @@ -50,11 +53,7 @@ public function __construct(string $source) ); $this->scanner = new Scanner($source); - } - - public function getBuffer(): string - { - return $this->scanner->getBuffer()->getContents(); + $this->buffer = $this->scanner->getBuffer(); } public function isEnd(): bool @@ -71,21 +70,6 @@ public function assertIsEnd(): void } } - public function getStartPosition(): Position - { - return $this->scanner->getBuffer()->getStart(); - } - - public function getEndPosition(): Position - { - return $this->scanner->getBuffer()->getEnd(); - } - - public function getCursorRange(): Range - { - return $this->scanner->getBuffer()->getRange(); - } - public function read(Rule $rule): void { if ($this->scanner->scan($rule)) { diff --git a/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php b/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php index 6ea47ff..1e3856d 100644 --- a/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php +++ b/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php @@ -47,7 +47,7 @@ public function parse(Lexer $lexer): BooleanLiteralNode $rule = $lexer->readOneOf(self::$RULES_BOOLEAN_KEYWORDS); return new BooleanLiteralNode( - rangeInSource: $lexer->getCursorRange(), + rangeInSource: $lexer->buffer->getRange(), value: $rule === Rule::KEYWORD_TRUE ); } diff --git a/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php b/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php index 1d21193..f3ad5af 100644 --- a/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php +++ b/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php @@ -55,7 +55,7 @@ private function __construct() public function parse(Lexer $lexer): ComponentDeclarationNode { $lexer->read(Rule::KEYWORD_COMPONENT); - $start = $lexer->getStartPosition(); + $start = $lexer->buffer->getStart(); $lexer->skipSpace(); $name = $this->parseName($lexer); @@ -63,7 +63,7 @@ public function parse(Lexer $lexer): ComponentDeclarationNode $return = $this->parseReturn($lexer); $lexer->read(Rule::BRACKET_CURLY_CLOSE); - $end = $lexer->getEndPosition(); + $end = $lexer->buffer->getEnd(); return new ComponentDeclarationNode( rangeInSource: Range::from($start, $end), @@ -77,8 +77,8 @@ private function parseName(Lexer $lexer): ComponentNameNode { $lexer->read(Rule::WORD); $componentNameNode = new ComponentNameNode( - rangeInSource: $lexer->getCursorRange(), - value: ComponentName::from($lexer->getBuffer()) + rangeInSource: $lexer->buffer->getRange(), + value: ComponentName::from($lexer->buffer->getContents()) ); $lexer->skipSpace(); diff --git a/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php b/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php index 0cd2406..906dbc8 100644 --- a/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php +++ b/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php @@ -63,13 +63,13 @@ private function __construct() public function parse(Lexer $lexer): EnumDeclarationNode { $lexer->read(Rule::KEYWORD_ENUM); - $start = $lexer->getStartPosition(); + $start = $lexer->buffer->getStart(); $lexer->skipSpace(); $enumNameNode = $this->parseEnumName($lexer); $enumMemberDeclarations = $this->parseEnumMemberDeclarations($lexer); - $end = $lexer->getEndPosition(); + $end = $lexer->buffer->getEnd(); return new EnumDeclarationNode( rangeInSource: Range::from($start, $end), @@ -82,8 +82,8 @@ private function parseEnumName(Lexer $lexer): EnumNameNode { $lexer->read(Rule::WORD); $enumNameNode = new EnumNameNode( - rangeInSource: $lexer->getCursorRange(), - value: EnumName::from($lexer->getBuffer()) + rangeInSource: $lexer->buffer->getRange(), + value: EnumName::from($lexer->buffer->getContents()) ); $lexer->skipSpace(); @@ -128,15 +128,15 @@ private function parseEnumMemberName(Lexer $lexer): EnumMemberNameNode $lexer->read(Rule::WORD); return new EnumMemberNameNode( - rangeInSource: $lexer->getCursorRange(), - value: EnumMemberName::from($lexer->getBuffer()) + rangeInSource: $lexer->buffer->getRange(), + value: EnumMemberName::from($lexer->buffer->getContents()) ); } private function parseEnumMemberValue(Lexer $lexer): ?EnumMemberValueNode { if ($lexer->probe(Rule::BRACKET_ROUND_OPEN)) { - $start = $lexer->getStartPosition(); + $start = $lexer->buffer->getStart(); $value = match ($lexer->expectOneOf(self::$RULES_ENUM_MEMBER_VALUE_START)) { Rule::STRING_LITERAL_DELIMITER => @@ -146,7 +146,7 @@ private function parseEnumMemberValue(Lexer $lexer): ?EnumMemberValueNode }; $lexer->read(Rule::BRACKET_ROUND_CLOSE); - $end = $lexer->getEndPosition(); + $end = $lexer->buffer->getEnd(); return new EnumMemberValueNode( rangeInSource: Range::from($start, $end), diff --git a/src/Language/Parser/Export/ExportParser.php b/src/Language/Parser/Export/ExportParser.php index 9ba5662..b4dad11 100644 --- a/src/Language/Parser/Export/ExportParser.php +++ b/src/Language/Parser/Export/ExportParser.php @@ -60,7 +60,7 @@ public function parse(Lexer $lexer): ExportNode { try { $lexer->read(Rule::KEYWORD_EXPORT); - $start = $lexer->getStartPosition(); + $start = $lexer->buffer->getStart(); $lexer->skipSpace(); @@ -71,7 +71,7 @@ public function parse(Lexer $lexer): ExportNode default => throw new LogicException() }; - $end = $lexer->getEndPosition(); + $end = $lexer->buffer->getEnd(); return new ExportNode( rangeInSource: Range::from($start, $end), diff --git a/src/Language/Parser/Expression/ExpressionParser.php b/src/Language/Parser/Expression/ExpressionParser.php index cc1ff7e..26c6f8c 100644 --- a/src/Language/Parser/Expression/ExpressionParser.php +++ b/src/Language/Parser/Expression/ExpressionParser.php @@ -184,7 +184,7 @@ private function parseUnaryStatement(Lexer $lexer): ExpressionNode private function parseUnaryOperation(Lexer $lexer): ExpressionNode { $operator = $this->parseUnaryOperator($lexer); - $start = $lexer->getStartPosition(); + $start = $lexer->buffer->getStart(); $lexer->skipSpaceAndComments(); $operand = $this->parseUnaryStatement($lexer); @@ -316,13 +316,13 @@ private function parseMatch(Lexer $lexer): ExpressionNode private function parseBracketedExpression(Lexer $lexer): ExpressionNode { $lexer->read(Rule::BRACKET_ROUND_OPEN); - $start = $lexer->getStartPosition(); + $start = $lexer->buffer->getStart(); $lexer->skipSpaceAndComments(); $innerExpressionNode = $this->parse($lexer); $lexer->read(Rule::BRACKET_ROUND_CLOSE); - $end = $lexer->getEndPosition(); + $end = $lexer->buffer->getEnd(); $lexer->skipSpaceAndComments(); return new ExpressionNode( @@ -337,13 +337,13 @@ private function parseAccess(Lexer $lexer, ExpressionNode $parent): ExpressionNo $lexer->read(Rule::WORD); $accessNode = new AccessNode( rangeInSource: $parent->rangeInSource->start->toRange( - $lexer->getEndPosition() + $lexer->buffer->getEnd() ), parent: $parent, type: $type, key: new AccessKeyNode( - rangeInSource: $lexer->getCursorRange(), - value: PropertyName::from($lexer->getBuffer()) + rangeInSource: $lexer->buffer->getRange(), + value: PropertyName::from($lexer->buffer->getContents()) ) ); diff --git a/src/Language/Parser/Import/ImportParser.php b/src/Language/Parser/Import/ImportParser.php index b42d888..b5b6f87 100644 --- a/src/Language/Parser/Import/ImportParser.php +++ b/src/Language/Parser/Import/ImportParser.php @@ -58,7 +58,7 @@ public function parse(Lexer $lexer): ImportNode { try { $lexer->read(Rule::KEYWORD_FROM); - $start = $lexer->getStartPosition(); + $start = $lexer->buffer->getStart(); $lexer->skipSpace(); $path = $this->parsePath($lexer); @@ -67,7 +67,7 @@ public function parse(Lexer $lexer): ImportNode $lexer->skipSpace(); $names = $this->parseNames($lexer); - $end = $lexer->getEndPosition(); + $end = $lexer->buffer->getEnd(); return new ImportNode( rangeInSource: Range::from($start, $end), @@ -92,15 +92,15 @@ private function parsePath(Lexer $lexer): StringLiteralNode private function parseNames(Lexer $lexer): ImportedNameNodes { $lexer->read(Rule::BRACKET_CURLY_OPEN); - $start = $lexer->getStartPosition(); + $start = $lexer->buffer->getStart(); $lexer->skipSpaceAndComments(); $nameNodes = []; while (!$lexer->peek(Rule::BRACKET_CURLY_CLOSE)) { $lexer->read(Rule::WORD); $nameNodes[] = new ImportedNameNode( - rangeInSource: $lexer->getCursorRange(), - value: VariableName::from($lexer->getBuffer()) + rangeInSource: $lexer->buffer->getRange(), + value: VariableName::from($lexer->buffer->getContents()) ); $lexer->skipSpaceAndComments(); @@ -112,7 +112,7 @@ private function parseNames(Lexer $lexer): ImportedNameNodes } $lexer->read(Rule::BRACKET_CURLY_CLOSE); - $end = $lexer->getEndPosition(); + $end = $lexer->buffer->getEnd(); try { return new ImportedNameNodes(...$nameNodes); @@ -129,8 +129,8 @@ public function parseName(Lexer $lexer): ImportedNameNode $lexer->read(Rule::WORD); return new ImportedNameNode( - rangeInSource: $lexer->getCursorRange(), - value: VariableName::from($lexer->getBuffer()) + rangeInSource: $lexer->buffer->getRange(), + value: VariableName::from($lexer->buffer->getContents()) ); } } diff --git a/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php b/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php index 89290f5..9912a58 100644 --- a/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php +++ b/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php @@ -55,9 +55,9 @@ public function parse(Lexer $lexer): IntegerLiteralNode $rule = $lexer->readOneOf(self::$INTEGER_TOKEN_TYPES); return new IntegerLiteralNode( - rangeInSource: $lexer->getCursorRange(), + rangeInSource: $lexer->buffer->getRange(), format: $this->getIntegerFormatFromToken($rule), - value: $lexer->getBuffer() + value: $lexer->buffer->getContents() ); } catch (LexerException $e) { throw IntegerLiteralCouldNotBeParsed::becauseOfLexerException($e); diff --git a/src/Language/Parser/Match/MatchParser.php b/src/Language/Parser/Match/MatchParser.php index 8754d69..b111c71 100644 --- a/src/Language/Parser/Match/MatchParser.php +++ b/src/Language/Parser/Match/MatchParser.php @@ -45,12 +45,12 @@ final class MatchParser public function parse(Lexer $lexer): MatchNode { $lexer->read(Rule::KEYWORD_MATCH); - $start = $lexer->getStartPosition(); + $start = $lexer->buffer->getStart(); $lexer->skipSpace(); $subject = $this->parseSubject($lexer); $arms = $this->parseArms($lexer); - $end = $lexer->getEndPosition(); + $end = $lexer->buffer->getEnd(); return new MatchNode( rangeInSource: Range::from($start, $end), @@ -69,7 +69,7 @@ private function parseSubject(Lexer $lexer): ExpressionNode private function parseArms(Lexer $lexer): MatchArmNodes { $lexer->read(Rule::BRACKET_CURLY_OPEN); - $start = $lexer->getStartPosition(); + $start = $lexer->buffer->getStart(); $items = []; while (!$lexer->peek(Rule::BRACKET_CURLY_CLOSE)) { @@ -80,7 +80,7 @@ private function parseArms(Lexer $lexer): MatchArmNodes $lexer->skipSpaceAndComments(); $lexer->read(Rule::BRACKET_CURLY_CLOSE); - $end = $lexer->getEndPosition(); + $end = $lexer->buffer->getEnd(); try { return new MatchArmNodes(...$items); @@ -96,7 +96,7 @@ private function parseArm(Lexer $lexer): MatchArmNode { $left = $this->parseArmLeft($lexer); $start = $left?->items[0]?->rangeInSource->start ?? - $lexer->getStartPosition(); + $lexer->buffer->getStart(); $lexer->skipSpaceAndComments(); $lexer->read(Rule::SYMBOL_ARROW_SINGLE); diff --git a/src/Language/Parser/NullLiteral/NullLiteralParser.php b/src/Language/Parser/NullLiteral/NullLiteralParser.php index 5373cc4..fb6ae52 100644 --- a/src/Language/Parser/NullLiteral/NullLiteralParser.php +++ b/src/Language/Parser/NullLiteral/NullLiteralParser.php @@ -36,7 +36,7 @@ public function parse(Lexer $lexer): NullLiteralNode $lexer->read(Rule::KEYWORD_NULL); return new NullLiteralNode( - rangeInSource: $lexer->getCursorRange() + rangeInSource: $lexer->buffer->getRange() ); } } diff --git a/src/Language/Parser/PropertyDeclaration/PropertyDeclarationParser.php b/src/Language/Parser/PropertyDeclaration/PropertyDeclarationParser.php index 0cbed2e..ddcbc8b 100644 --- a/src/Language/Parser/PropertyDeclaration/PropertyDeclarationParser.php +++ b/src/Language/Parser/PropertyDeclaration/PropertyDeclarationParser.php @@ -62,8 +62,8 @@ public function parsePropertyName(Lexer $lexer): PropertyNameNode $lexer->read(Rule::WORD); return new PropertyNameNode( - rangeInSource: $lexer->getCursorRange(), - value: PropertyName::from($lexer->getBuffer()) + rangeInSource: $lexer->buffer->getRange(), + value: PropertyName::from($lexer->buffer->getContents()) ); } } diff --git a/src/Language/Parser/StringLiteral/StringLiteralParser.php b/src/Language/Parser/StringLiteral/StringLiteralParser.php index 72d8998..0875144 100644 --- a/src/Language/Parser/StringLiteral/StringLiteralParser.php +++ b/src/Language/Parser/StringLiteral/StringLiteralParser.php @@ -35,23 +35,23 @@ final class StringLiteralParser public function parse(Lexer $lexer): StringLiteralNode { $lexer->read(Rule::STRING_LITERAL_DELIMITER); - $start = $lexer->getStartPosition(); + $start = $lexer->buffer->getStart(); $value = ''; while (!$lexer->peek(Rule::STRING_LITERAL_DELIMITER)) { if ($lexer->probe(Rule::STRING_LITERAL_CONTENT)) { - $value = $lexer->getBuffer(); + $value = $lexer->buffer->getContents(); } if ($lexer->probe(Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER)) { - $value = $lexer->getBuffer(); + $value = $lexer->buffer->getContents(); } break; } $lexer->read(Rule::STRING_LITERAL_DELIMITER); - $end = $lexer->getEndPosition(); + $end = $lexer->buffer->getEnd(); return new StringLiteralNode( rangeInSource: Range::from($start, $end), diff --git a/src/Language/Parser/StructDeclaration/StructDeclarationParser.php b/src/Language/Parser/StructDeclaration/StructDeclarationParser.php index 319e0a6..83de98a 100644 --- a/src/Language/Parser/StructDeclaration/StructDeclarationParser.php +++ b/src/Language/Parser/StructDeclaration/StructDeclarationParser.php @@ -41,12 +41,12 @@ final class StructDeclarationParser public function parse(Lexer $lexer): StructDeclarationNode { $lexer->read(Rule::KEYWORD_STRUCT); - $start = $lexer->getStartPosition(); + $start = $lexer->buffer->getStart(); $lexer->skipSpace(); $structNameNode = $this->parseStructName($lexer); $propertyDeclarationNodes = $this->parsePropertyDeclarations($lexer); - $end = $lexer->getEndPosition(); + $end = $lexer->buffer->getEnd(); return new StructDeclarationNode( rangeInSource: Range::from($start, $end), @@ -59,8 +59,8 @@ private function parseStructName(Lexer $lexer): StructNameNode { $lexer->read(Rule::WORD); $structNameNode = new StructNameNode( - rangeInSource: $lexer->getCursorRange(), - value: StructName::from($lexer->getBuffer()) + rangeInSource: $lexer->buffer->getRange(), + value: StructName::from($lexer->buffer->getContents()) ); $lexer->skipSpaceAndComments(); diff --git a/src/Language/Parser/Tag/TagParser.php b/src/Language/Parser/Tag/TagParser.php index 9df85d0..34aebff 100644 --- a/src/Language/Parser/Tag/TagParser.php +++ b/src/Language/Parser/Tag/TagParser.php @@ -64,14 +64,14 @@ private function __construct() public function parse(Lexer $lexer): TagNode { $lexer->read(Rule::BRACKET_ANGLE_OPEN); - $start = $lexer->getStartPosition(); + $start = $lexer->buffer->getStart(); $name = $this->parseName($lexer); $attributes = $this->parseAttributes($lexer); if ($lexer->probe(Rule::SYMBOL_SLASH_FORWARD)) { $lexer->read(Rule::BRACKET_ANGLE_CLOSE); - $end = $lexer->getEndPosition(); + $end = $lexer->buffer->getEnd(); return new TagNode( rangeInSource: Range::from($start, $end), @@ -86,7 +86,7 @@ public function parse(Lexer $lexer): TagNode $children = $this->parseChildren($lexer); $this->readClosingTagName($lexer, $name->value); - $end = $lexer->getEndPosition(); + $end = $lexer->buffer->getEnd(); return new TagNode( rangeInSource: Range::from($start, $end), @@ -102,10 +102,10 @@ private function parseName(Lexer $lexer): TagNameNode $lexer->read(Rule::WORD); $tagNameNode = new TagNameNode( rangeInSource: Range::from( - $lexer->getStartPosition(), - $lexer->getEndPosition() + $lexer->buffer->getStart(), + $lexer->buffer->getEnd() ), - value: TagName::from($lexer->getBuffer()) + value: TagName::from($lexer->buffer->getContents()) ); $lexer->skipSpace(); @@ -145,8 +145,8 @@ private function parseAttributeName(Lexer $lexer): AttributeNameNode $lexer->read(Rule::WORD); return new AttributeNameNode( - rangeInSource: $lexer->getCursorRange(), - value: AttributeName::from($lexer->getBuffer()) + rangeInSource: $lexer->buffer->getRange(), + value: AttributeName::from($lexer->buffer->getContents()) ); } @@ -219,13 +219,13 @@ private function parseText(Lexer $lexer, bool $preserveLeadingSpace): ?TextNode private function readClosingTagName(Lexer $lexer, TagName $expectedName): void { $lexer->read(Rule::SYMBOL_CLOSE_TAG); - $start = $lexer->getStartPosition(); + $start = $lexer->buffer->getStart(); $lexer->read(Rule::WORD); - $closingName = $lexer->getBuffer(); + $closingName = $lexer->buffer->getContents(); $lexer->read(Rule::BRACKET_ANGLE_CLOSE); - $end = $lexer->getEndPosition(); + $end = $lexer->buffer->getEnd(); if ($closingName !== $expectedName->value) { throw TagCouldNotBeParsed::becauseOfClosingTagNameMismatch( diff --git a/src/Language/Parser/TemplateLiteral/TemplateLiteralParser.php b/src/Language/Parser/TemplateLiteral/TemplateLiteralParser.php index eb4c7cf..5e3d9a4 100644 --- a/src/Language/Parser/TemplateLiteral/TemplateLiteralParser.php +++ b/src/Language/Parser/TemplateLiteral/TemplateLiteralParser.php @@ -43,16 +43,16 @@ final class TemplateLiteralParser public function parse(Lexer $lexer): TemplateLiteralNode { $lexer->read(Rule::TEMPLATE_LITERAL_DELIMITER); - $start = $lexer->getStartPosition(); + $start = $lexer->buffer->getStart(); $lines = $this->parseLines($lexer); $lexer->read(Rule::TEMPLATE_LITERAL_DELIMITER); - $end = $lexer->getEndPosition(); + $end = $lexer->buffer->getEnd(); return new TemplateLiteralNode( rangeInSource: Range::from($start, $end), - indentation: $lexer->getStartPosition()->columnNumber, + indentation: $lexer->buffer->getStart()->columnNumber, lines: $lines ); } @@ -102,8 +102,8 @@ public function parseStringSegment(Lexer $lexer): TemplateLiteralStringSegmentNo $lexer->read(Rule::TEMPLATE_LITERAL_CONTENT); return new TemplateLiteralStringSegmentNode( - rangeInSource: $lexer->getCursorRange(), - value: $lexer->getBuffer() + rangeInSource: $lexer->buffer->getRange(), + value: $lexer->buffer->getContents() ); } @@ -112,14 +112,14 @@ public function parseExpressionSegment(Lexer $lexer): TemplateLiteralExpressionS $this->expressionParser ??= new ExpressionParser(); $lexer->read(Rule::BRACKET_CURLY_OPEN); - $start = $lexer->getStartPosition(); + $start = $lexer->buffer->getStart(); $lexer->skipSpaceAndComments(); $expression = $this->expressionParser->parse($lexer); $lexer->skipSpaceAndComments(); $lexer->read(Rule::BRACKET_CURLY_CLOSE); - $end = $lexer->getEndPosition(); + $end = $lexer->buffer->getEnd(); return new TemplateLiteralExpressionSegmentNode( rangeInSource: Range::from($start, $end), diff --git a/src/Language/Parser/Text/TextParser.php b/src/Language/Parser/Text/TextParser.php index 4619515..5bf175c 100644 --- a/src/Language/Parser/Text/TextParser.php +++ b/src/Language/Parser/Text/TextParser.php @@ -58,12 +58,12 @@ public function parse(Lexer $lexer, bool $preserveLeadingSpace = false): ?TextNo $hasLeadingSpace = false; if ($lexer->probe(Rule::SPACE)) { - $start = $lexer->getStartPosition(); + $start = $lexer->buffer->getStart(); $hasLeadingSpace = true; } if ($lexer->probe(Rule::END_OF_LINE)) { - $start ??= $lexer->getStartPosition(); + $start ??= $lexer->buffer->getStart(); $hasLeadingSpace = false; } @@ -79,13 +79,13 @@ public function parse(Lexer $lexer, bool $preserveLeadingSpace = false): ?TextNo $rule = $lexer->readOneOf(self::$RULES_CONTENT); if ($rule === Rule::TEXT) { - $start ??= $lexer->getStartPosition(); + $start ??= $lexer->buffer->getStart(); if ($hasTrailingSpace) { $value .= ' '; $hasTrailingSpace = false; $trailingSpaceContainsLineBreaks = false; } - $value .= $lexer->getBuffer(); + $value .= $lexer->buffer->getContents(); continue; } @@ -100,7 +100,7 @@ public function parse(Lexer $lexer, bool $preserveLeadingSpace = false): ?TextNo return null; } - $end = $lexer->getEndPosition(); + $end = $lexer->buffer->getEnd(); if ($hasTrailingSpace && !$trailingSpaceContainsLineBreaks && !$lexer->isEnd() && !$lexer->peek(Rule::SYMBOL_CLOSE_TAG)) { $value .= ' '; diff --git a/src/Language/Parser/TypeReference/TypeReferenceParser.php b/src/Language/Parser/TypeReference/TypeReferenceParser.php index 99001a6..d23b256 100644 --- a/src/Language/Parser/TypeReference/TypeReferenceParser.php +++ b/src/Language/Parser/TypeReference/TypeReferenceParser.php @@ -44,11 +44,11 @@ public function parse(Lexer $lexer): TypeReferenceNode { $this->start = null; if ($isOptional = $lexer->probe(Rule::SYMBOL_QUESTIONMARK)) { - $this->start = $lexer->getStartPosition(); + $this->start = $lexer->buffer->getStart(); } $typeNameNodes = $this->parseTypeNames($lexer); $isArray = $this->parseIsArray($lexer); - $end = $lexer->getEndPosition(); + $end = $lexer->buffer->getEnd(); assert($this->start !== null); @@ -85,11 +85,11 @@ public function parseTypeNames(Lexer $lexer): TypeNameNodes public function parseTypeName(Lexer $lexer): TypeNameNode { $lexer->read(Rule::WORD); - $this->start ??= $lexer->getStartPosition(); + $this->start ??= $lexer->buffer->getStart(); return new TypeNameNode( - rangeInSource: $lexer->getCursorRange(), - value: TypeName::from($lexer->getBuffer()) + rangeInSource: $lexer->buffer->getRange(), + value: TypeName::from($lexer->buffer->getContents()) ); } diff --git a/src/Language/Parser/ValueReference/ValueReferenceParser.php b/src/Language/Parser/ValueReference/ValueReferenceParser.php index 0c5b15e..ffb2ea0 100644 --- a/src/Language/Parser/ValueReference/ValueReferenceParser.php +++ b/src/Language/Parser/ValueReference/ValueReferenceParser.php @@ -37,8 +37,8 @@ public function parse(Lexer $lexer): ValueReferenceNode $lexer->read(Rule::WORD); return new ValueReferenceNode( - rangeInSource: $lexer->getCursorRange(), - name: VariableName::from($lexer->getBuffer()) + rangeInSource: $lexer->buffer->getRange(), + name: VariableName::from($lexer->buffer->getContents()) ); } } diff --git a/test/Unit/Language/Lexer/LexerTest.php b/test/Unit/Language/Lexer/LexerTest.php index 2f79405..a4899fb 100644 --- a/test/Unit/Language/Lexer/LexerTest.php +++ b/test/Unit/Language/Lexer/LexerTest.php @@ -42,19 +42,19 @@ protected function assertLexerState( ): void { $this->assertEquals( $startPosition, - $this->lexer->getStartPosition(), + $this->lexer->buffer->getStart(), 'Failed asserting that start position of lexer equals' ); $this->assertEquals( $endPosition, - $this->lexer->getEndPosition(), + $this->lexer->buffer->getEnd(), 'Failed asserting that end position of lexer equals' ); $this->assertEquals( $buffer, - $this->lexer->getBuffer(), + $this->lexer->buffer->getContents(), 'Failed asserting that buffer of lexer equals' ); From 6ce0e9a242be24f955e2dc48bfa3a6f20cc16ff6 Mon Sep 17 00:00:00 2001 From: Wilhelm Behncke Date: Thu, 17 Aug 2023 17:33:04 +0200 Subject: [PATCH 12/19] TASK: Remove superfluous classes under CharacterStream\\* --- .../CharacterStreamSnapshot.php | 35 -------- src/Language/Lexer/CharacterStream/Cursor.php | 82 ------------------- .../Lexer/CharacterStream/CursorSnapshot.php | 37 --------- src/Language/Lexer/Lexer.php | 3 - 4 files changed, 157 deletions(-) delete mode 100644 src/Language/Lexer/CharacterStream/CharacterStreamSnapshot.php delete mode 100644 src/Language/Lexer/CharacterStream/Cursor.php delete mode 100644 src/Language/Lexer/CharacterStream/CursorSnapshot.php diff --git a/src/Language/Lexer/CharacterStream/CharacterStreamSnapshot.php b/src/Language/Lexer/CharacterStream/CharacterStreamSnapshot.php deleted file mode 100644 index 4b2c364..0000000 --- a/src/Language/Lexer/CharacterStream/CharacterStreamSnapshot.php +++ /dev/null @@ -1,35 +0,0 @@ -. - */ - -declare(strict_types=1); - -namespace PackageFactory\ComponentEngine\Language\Lexer\CharacterStream; - -/** - * @internal - */ -final class CharacterStreamSnapshot -{ - public function __construct( - public readonly int $byte, - public readonly ?string $characterUnderCursor = null - ) { - } -} diff --git a/src/Language/Lexer/CharacterStream/Cursor.php b/src/Language/Lexer/CharacterStream/Cursor.php deleted file mode 100644 index f2bf00b..0000000 --- a/src/Language/Lexer/CharacterStream/Cursor.php +++ /dev/null @@ -1,82 +0,0 @@ -. - */ - -declare(strict_types=1); - -namespace PackageFactory\ComponentEngine\Language\Lexer\CharacterStream; - -use PackageFactory\ComponentEngine\Parser\Source\Position; - -/** - * @internal - */ -final class Cursor -{ - private int $currentLineNumber = 0; - private int $currentColumnNumber = 0; - private int $previousLineNumber = -1; - private int $previousColumnNumber = -1; - - public function advance(?string $character): void - { - if ($character !== null) { - $this->previousLineNumber = $this->currentLineNumber; - $this->previousColumnNumber = $this->currentColumnNumber; - - if ($character === "\n") { - $this->currentLineNumber++; - $this->currentColumnNumber = 0; - } else { - $this->currentColumnNumber++; - } - } - } - - public function getCurrentPosition(): Position - { - return new Position($this->currentLineNumber, $this->currentColumnNumber); - } - - public function getPreviousPosition(): Position - { - assert($this->previousLineNumber >= 0); - assert($this->previousColumnNumber >= 0); - - return new Position($this->previousLineNumber, $this->previousColumnNumber); - } - - public function makeSnapshot(): CursorSnapshot - { - return new CursorSnapshot( - currentLineNumber: $this->currentLineNumber, - currentColumnNumber: $this->currentColumnNumber, - previousLineNumber: $this->previousLineNumber, - previousColumnNumber: $this->previousColumnNumber - ); - } - - public function restoreSnapshot(CursorSnapshot $snapshot): void - { - $this->currentLineNumber = $snapshot->currentLineNumber; - $this->currentColumnNumber = $snapshot->currentColumnNumber; - $this->previousLineNumber = $snapshot->previousLineNumber; - $this->previousColumnNumber = $snapshot->previousColumnNumber; - } -} diff --git a/src/Language/Lexer/CharacterStream/CursorSnapshot.php b/src/Language/Lexer/CharacterStream/CursorSnapshot.php deleted file mode 100644 index eadc09b..0000000 --- a/src/Language/Lexer/CharacterStream/CursorSnapshot.php +++ /dev/null @@ -1,37 +0,0 @@ -. - */ - -declare(strict_types=1); - -namespace PackageFactory\ComponentEngine\Language\Lexer\CharacterStream; - -/** - * @internal - */ -final class CursorSnapshot -{ - public function __construct( - public readonly int $currentLineNumber, - public readonly int $currentColumnNumber, - public readonly int $previousLineNumber, - public readonly int $previousColumnNumber - ) { - } -} diff --git a/src/Language/Lexer/Lexer.php b/src/Language/Lexer/Lexer.php index dbe9afe..5fb55d5 100644 --- a/src/Language/Lexer/Lexer.php +++ b/src/Language/Lexer/Lexer.php @@ -24,12 +24,9 @@ use PackageFactory\ComponentEngine\Language\Lexer\Buffer\Buffer; use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; -use PackageFactory\ComponentEngine\Language\Lexer\Rule\RuleInterface; use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Language\Lexer\Scanner\Scanner; use PackageFactory\ComponentEngine\Language\Lexer\Scanner\ScannerException; -use PackageFactory\ComponentEngine\Parser\Source\Position; -use PackageFactory\ComponentEngine\Parser\Source\Range; final class Lexer { From 714673bc8f34f04e1505656ca01f6eda509eb970 Mon Sep 17 00:00:00 2001 From: Wilhelm Behncke Date: Thu, 17 Aug 2023 17:59:11 +0200 Subject: [PATCH 13/19] TASK: Remove Rules class --- src/Language/Lexer/Lexer.php | 50 +++++----- src/Language/Lexer/LexerException.php | 21 +++- src/Language/Lexer/Rule/Rules.php | 51 ---------- .../BooleanLiteral/BooleanLiteralParser.php | 15 +-- .../ComponentDeclarationParser.php | 15 +-- .../EnumDeclaration/EnumDeclarationParser.php | 21 ++-- src/Language/Parser/Export/ExportParser.php | 17 ++-- .../Parser/Expression/ExpressionParser.php | 88 ++++++++--------- src/Language/Parser/Import/ImportParser.php | 13 --- .../IntegerLiteral/IntegerLiteralParser.php | 34 ++----- src/Language/Parser/Tag/TagParser.php | 15 +-- src/Language/Parser/Text/TextParser.php | 33 +++---- src/Language/Util/DebugHelper.php | 11 +-- test/Unit/Language/Lexer/LexerTest.php | 96 +++++++++---------- .../Parser/Export/ExportParserTest.php | 4 +- .../IntegerLiteralParserTest.php | 8 +- 16 files changed, 188 insertions(+), 304 deletions(-) delete mode 100644 src/Language/Lexer/Rule/Rules.php diff --git a/src/Language/Lexer/Lexer.php b/src/Language/Lexer/Lexer.php index 5fb55d5..7d3f229 100644 --- a/src/Language/Lexer/Lexer.php +++ b/src/Language/Lexer/Lexer.php @@ -24,14 +24,20 @@ use PackageFactory\ComponentEngine\Language\Lexer\Buffer\Buffer; use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; -use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Language\Lexer\Scanner\Scanner; use PackageFactory\ComponentEngine\Language\Lexer\Scanner\ScannerException; final class Lexer { - private static Rules $RULES_SPACE; - private static Rules $RULES_SPACE_AND_COMMENTS; + private const RULES_SPACE = [ + Rule::SPACE, + Rule::END_OF_LINE + ]; + private const RULES_SPACE_AND_COMMENTS = [ + Rule::SPACE, + Rule::END_OF_LINE, + Rule::COMMENT + ]; private readonly Scanner $scanner; @@ -39,16 +45,6 @@ final class Lexer public function __construct(string $source) { - self::$RULES_SPACE = Rules::from( - Rule::SPACE, - Rule::END_OF_LINE - ); - self::$RULES_SPACE_AND_COMMENTS = Rules::from( - Rule::SPACE, - Rule::END_OF_LINE, - Rule::COMMENT - ); - $this->scanner = new Scanner($source); $this->buffer = $this->scanner->getBuffer(); } @@ -76,22 +72,22 @@ public function read(Rule $rule): void if ($this->scanner->isEnd()) { throw LexerException::becauseOfUnexpectedEndOfSource( - expectedRules: Rules::from($rule), + expectedRules: [$rule], affectedRangeInSource: $this->scanner->getBuffer()->getRange() ); } throw LexerException::becauseOfUnexpectedCharacterSequence( - expectedRules: Rules::from($rule), + expectedRules: [$rule], affectedRangeInSource: $this->scanner->getBuffer()->getRange(), actualCharacterSequence: $this->scanner->getBuffer()->getContents() ); } /** @phpstan-impure */ - public function readOneOf(Rules $rules): Rule + public function readOneOf(Rule ...$rules): Rule { - if ($rule = $this->scanner->scanOneOf(...$rules->items)) { + if ($rule = $this->scanner->scanOneOf(...$rules)) { $this->scanner->commit(); assert($rule instanceof Rule); return $rule; @@ -123,9 +119,9 @@ public function probe(Rule $rule): bool } /** @phpstan-impure */ - public function probeOneOf(Rules $rules): ?Rule + public function probeOneOf(Rule ...$rules): ?Rule { - if ($rule = $this->scanner->scanOneOf(...$rules->items)) { + if ($rule = $this->scanner->scanOneOf(...$rules)) { $this->scanner->commit(); assert($rule instanceof Rule); return $rule; @@ -144,9 +140,9 @@ public function peek(Rule $rule): bool } /** @phpstan-impure */ - public function peekOneOf(Rules $rules): ?Rule + public function peekOneOf(Rule ...$rules): ?Rule { - $rule = $this->scanner->scanOneOf(...$rules->items); + $rule = $this->scanner->scanOneOf(...$rules); $this->scanner->dismiss(); assert($rule === null || $rule instanceof Rule); @@ -157,14 +153,14 @@ public function expect(Rule $rule): void { if ($this->scanner->isEnd()) { throw LexerException::becauseOfUnexpectedEndOfSource( - expectedRules: Rules::from($rule), + expectedRules: [$rule], affectedRangeInSource: $this->scanner->getBuffer()->getRange() ); } if (!$this->scanner->scan($rule)) { throw LexerException::becauseOfUnexpectedCharacterSequence( - expectedRules: Rules::from($rule), + expectedRules: [$rule], affectedRangeInSource: $this->scanner->getBuffer()->getRange(), actualCharacterSequence: $this->scanner->getBuffer()->getContents() ); @@ -174,7 +170,7 @@ public function expect(Rule $rule): void } /** @phpstan-impure */ - public function expectOneOf(Rules $rules): Rule + public function expectOneOf(Rule ...$rules): Rule { if ($this->scanner->isEnd()) { throw LexerException::becauseOfUnexpectedEndOfSource( @@ -183,7 +179,7 @@ public function expectOneOf(Rules $rules): Rule ); } - if ($rule = $this->scanner->scanOneOf(...$rules->items)) { + if ($rule = $this->scanner->scanOneOf(...$rules)) { $this->scanner->dismiss(); assert($rule instanceof Rule); return $rule; @@ -198,7 +194,7 @@ public function expectOneOf(Rules $rules): Rule public function skipSpace(): void { - while ($this->scanner->scanOneOf(...self::$RULES_SPACE->items)) { + while ($this->scanner->scanOneOf(...self::RULES_SPACE)) { $this->scanner->commit(); } @@ -211,7 +207,7 @@ public function skipSpace(): void public function skipSpaceAndComments(): void { - while ($this->scanner->scanOneOf(...self::$RULES_SPACE_AND_COMMENTS->items)) { + while ($this->scanner->scanOneOf(...self::RULES_SPACE_AND_COMMENTS)) { $this->scanner->commit(); } diff --git a/src/Language/Lexer/LexerException.php b/src/Language/Lexer/LexerException.php index aa2785a..3ae3650 100644 --- a/src/Language/Lexer/LexerException.php +++ b/src/Language/Lexer/LexerException.php @@ -23,7 +23,7 @@ namespace PackageFactory\ComponentEngine\Language\Lexer; use Exception; -use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; +use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; use PackageFactory\ComponentEngine\Language\Lexer\Scanner\ScannerException; use PackageFactory\ComponentEngine\Language\Util\DebugHelper; use PackageFactory\ComponentEngine\Parser\Source\Range; @@ -46,22 +46,33 @@ private function __construct( parent::__construct($message, $code, $cause); } + /** + * @param Rule[] $expectedRules + * @param Range $affectedRangeInSource + * @return self + */ public static function becauseOfUnexpectedEndOfSource( - Rules $expectedRules, + array $expectedRules, Range $affectedRangeInSource ): self { return new self( code: 1691489789, message: sprintf( 'Source ended unexpectedly. Expected %s instead.', - DebugHelper::describeRules($expectedRules) + DebugHelper::describeRules(...$expectedRules) ), affectedRangeInSource: $affectedRangeInSource ); } + /** + * @param Rule[] $expectedRules + * @param Range $affectedRangeInSource + * @param string $actualCharacterSequence + * @return self + */ public static function becauseOfUnexpectedCharacterSequence( - Rules $expectedRules, + array $expectedRules, Range $affectedRangeInSource, string $actualCharacterSequence ): self { @@ -70,7 +81,7 @@ public static function becauseOfUnexpectedCharacterSequence( message: sprintf( 'Unexpected character sequence "%s" was encountered. Expected %s instead.', $actualCharacterSequence, - DebugHelper::describeRules($expectedRules) + DebugHelper::describeRules(...$expectedRules) ), affectedRangeInSource: $affectedRangeInSource ); diff --git a/src/Language/Lexer/Rule/Rules.php b/src/Language/Lexer/Rule/Rules.php deleted file mode 100644 index 947e2ed..0000000 --- a/src/Language/Lexer/Rule/Rules.php +++ /dev/null @@ -1,51 +0,0 @@ -. - */ - -declare(strict_types=1); - -namespace PackageFactory\ComponentEngine\Language\Lexer\Rule; - -final class Rules -{ - /** - * @var Rule[] - */ - public readonly array $items; - - private function __construct(Rule ...$items) - { - assert(count($items) > 0); - - $this->items = $items; - } - - public static function from(Rule ...$items): self - { - $items = array_unique($items, SORT_REGULAR); - $items = array_values($items); - - return new self(...$items); - } - - public function contains(Rule $needle): bool - { - return in_array($needle, $this->items); - } -} diff --git a/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php b/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php index 1e3856d..bed1053 100644 --- a/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php +++ b/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php @@ -32,19 +32,14 @@ final class BooleanLiteralParser { use Singleton; - private static Rules $RULES_BOOLEAN_KEYWORDS; - - private function __construct() - { - self::$RULES_BOOLEAN_KEYWORDS ??= Rules::from( - Rule::KEYWORD_TRUE, - Rule::KEYWORD_FALSE - ); - } + private const RULES_BOOLEAN_KEYWORDS = [ + Rule::KEYWORD_TRUE, + Rule::KEYWORD_FALSE + ]; public function parse(Lexer $lexer): BooleanLiteralNode { - $rule = $lexer->readOneOf(self::$RULES_BOOLEAN_KEYWORDS); + $rule = $lexer->readOneOf(...self::RULES_BOOLEAN_KEYWORDS); return new BooleanLiteralNode( rangeInSource: $lexer->buffer->getRange(), diff --git a/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php b/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php index f3ad5af..13a615a 100644 --- a/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php +++ b/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php @@ -39,19 +39,14 @@ final class ComponentDeclarationParser { use Singleton; - private static Rules $RULES_SPACE; + private const RULES_SPACE = [ + Rule::SPACE, + Rule::END_OF_LINE + ]; private ?PropertyDeclarationParser $propertyDeclarationParser = null; private ?ExpressionParser $returnParser = null; - private function __construct() - { - self::$RULES_SPACE ??= Rules::from( - Rule::SPACE, - Rule::END_OF_LINE - ); - } - public function parse(Lexer $lexer): ComponentDeclarationNode { $lexer->read(Rule::KEYWORD_COMPONENT); @@ -108,7 +103,7 @@ private function parseReturn(Lexer $lexer): ExpressionNode $this->returnParser ??= new ExpressionParser(); $lexer->read(Rule::KEYWORD_RETURN); - $lexer->readOneOf(self::$RULES_SPACE); + $lexer->readOneOf(...self::RULES_SPACE); $lexer->skipSpaceAndComments(); return $this->returnParser->parse($lexer); diff --git a/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php b/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php index 906dbc8..97ccbae 100644 --- a/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php +++ b/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php @@ -44,22 +44,17 @@ final class EnumDeclarationParser { use Singleton; - private static Rules $RULES_ENUM_MEMBER_VALUE_START; + private const RULES_ENUM_MEMBER_VALUE_START = [ + Rule::STRING_LITERAL_DELIMITER, + Rule::INTEGER_BINARY, + Rule::INTEGER_OCTAL, + Rule::INTEGER_DECIMAL, + Rule::INTEGER_HEXADECIMAL + ]; private ?StringLiteralParser $stringLiteralParser = null; private ?IntegerLiteralParser $integerLiteralParser = null; - private function __construct() - { - self::$RULES_ENUM_MEMBER_VALUE_START ??= Rules::from( - Rule::STRING_LITERAL_DELIMITER, - Rule::INTEGER_BINARY, - Rule::INTEGER_OCTAL, - Rule::INTEGER_DECIMAL, - Rule::INTEGER_HEXADECIMAL - ); - } - public function parse(Lexer $lexer): EnumDeclarationNode { $lexer->read(Rule::KEYWORD_ENUM); @@ -138,7 +133,7 @@ private function parseEnumMemberValue(Lexer $lexer): ?EnumMemberValueNode if ($lexer->probe(Rule::BRACKET_ROUND_OPEN)) { $start = $lexer->buffer->getStart(); - $value = match ($lexer->expectOneOf(self::$RULES_ENUM_MEMBER_VALUE_START)) { + $value = match ($lexer->expectOneOf(...self::RULES_ENUM_MEMBER_VALUE_START)) { Rule::STRING_LITERAL_DELIMITER => $this->parseStringLiteral($lexer), default => diff --git a/src/Language/Parser/Export/ExportParser.php b/src/Language/Parser/Export/ExportParser.php index b4dad11..b7b788a 100644 --- a/src/Language/Parser/Export/ExportParser.php +++ b/src/Language/Parser/Export/ExportParser.php @@ -41,21 +41,16 @@ final class ExportParser { use Singleton; - private static Rules $RULES_DECLARATION_KEYWORDS; + private const RULES_DECLARATION_KEYWORDS = [ + Rule::KEYWORD_COMPONENT, + Rule::KEYWORD_ENUM, + Rule::KEYWORD_STRUCT + ]; private ?ComponentDeclarationParser $componentDeclarationParser = null; private ?EnumDeclarationParser $enumDeclarationParser = null; private ?StructDeclarationParser $structDeclarationParser = null; - private function __construct() - { - self::$RULES_DECLARATION_KEYWORDS ??= Rules::from( - Rule::KEYWORD_COMPONENT, - Rule::KEYWORD_ENUM, - Rule::KEYWORD_STRUCT - ); - } - public function parse(Lexer $lexer): ExportNode { try { @@ -64,7 +59,7 @@ public function parse(Lexer $lexer): ExportNode $lexer->skipSpace(); - $declaration = match ($lexer->expectOneOf(self::$RULES_DECLARATION_KEYWORDS)) { + $declaration = match ($lexer->expectOneOf(...self::RULES_DECLARATION_KEYWORDS)) { Rule::KEYWORD_COMPONENT => $this->parseComponentDeclaration($lexer), Rule::KEYWORD_ENUM => $this->parseEnumDeclaration($lexer), Rule::KEYWORD_STRUCT => $this->parseStructDeclaration($lexer), diff --git a/src/Language/Parser/Expression/ExpressionParser.php b/src/Language/Parser/Expression/ExpressionParser.php index 26c6f8c..19323ea 100644 --- a/src/Language/Parser/Expression/ExpressionParser.php +++ b/src/Language/Parser/Expression/ExpressionParser.php @@ -48,10 +48,42 @@ final class ExpressionParser { - private static Rules $RULES_ACCESS; - private static Rules $RULES_BINARY_OPERATORS; - private static Rules $RULES_UNARY; - private static Rules $RULES_CLOSING_DELIMITERS; + private const RULES_ACCESS = [ + Rule::SYMBOL_PERIOD, + Rule::SYMBOL_OPTCHAIN + ]; + private const RULES_BINARY_OPERATORS = [ + Rule::SYMBOL_NULLISH_COALESCE, + Rule::SYMBOL_BOOLEAN_AND, + Rule::SYMBOL_BOOLEAN_OR, + Rule::SYMBOL_STRICT_EQUALS, + Rule::SYMBOL_NOT_EQUALS, + Rule::SYMBOL_GREATER_THAN, + Rule::SYMBOL_LESS_THAN + ]; + private const RULES_UNARY = [ + Rule::SYMBOL_EXCLAMATIONMARK, + Rule::KEYWORD_TRUE, + Rule::KEYWORD_FALSE, + Rule::KEYWORD_NULL, + Rule::KEYWORD_MATCH, + Rule::STRING_LITERAL_DELIMITER, + Rule::INTEGER_HEXADECIMAL, + Rule::INTEGER_DECIMAL, + Rule::INTEGER_OCTAL, + Rule::INTEGER_BINARY, + Rule::WORD, + Rule::BRACKET_ANGLE_OPEN, + Rule::BRACKET_ROUND_OPEN + ]; + private const RULES_CLOSING_DELIMITERS = [ + Rule::BRACKET_CURLY_OPEN, + Rule::BRACKET_CURLY_CLOSE, + Rule::BRACKET_ROUND_CLOSE, + Rule::SYMBOL_COLON, + Rule::SYMBOL_COMMA, + Rule::SYMBOL_ARROW_SINGLE + ]; private ?BooleanLiteralParser $booleanLiteralParser = null; private ?IntegerLiteralParser $integerLiteralParser = null; @@ -65,42 +97,6 @@ final class ExpressionParser public function __construct( private Precedence $precedence = Precedence::SEQUENCE ) { - self::$RULES_ACCESS ??= Rules::from( - Rule::SYMBOL_PERIOD, - Rule::SYMBOL_OPTCHAIN - ); - self::$RULES_BINARY_OPERATORS ??= Rules::from( - Rule::SYMBOL_NULLISH_COALESCE, - Rule::SYMBOL_BOOLEAN_AND, - Rule::SYMBOL_BOOLEAN_OR, - Rule::SYMBOL_STRICT_EQUALS, - Rule::SYMBOL_NOT_EQUALS, - Rule::SYMBOL_GREATER_THAN, - Rule::SYMBOL_LESS_THAN - ); - self::$RULES_UNARY ??= Rules::from( - Rule::SYMBOL_EXCLAMATIONMARK, - Rule::KEYWORD_TRUE, - Rule::KEYWORD_FALSE, - Rule::KEYWORD_NULL, - Rule::KEYWORD_MATCH, - Rule::STRING_LITERAL_DELIMITER, - Rule::INTEGER_HEXADECIMAL, - Rule::INTEGER_DECIMAL, - Rule::INTEGER_OCTAL, - Rule::INTEGER_BINARY, - Rule::WORD, - Rule::BRACKET_ANGLE_OPEN, - Rule::BRACKET_ROUND_OPEN - ); - self::$RULES_CLOSING_DELIMITERS = Rules::from( - Rule::BRACKET_CURLY_OPEN, - Rule::BRACKET_CURLY_CLOSE, - Rule::BRACKET_ROUND_CLOSE, - Rule::SYMBOL_COLON, - Rule::SYMBOL_COMMA, - Rule::SYMBOL_ARROW_SINGLE - ); } public function parse(Lexer $lexer): ExpressionNode @@ -110,11 +106,11 @@ public function parse(Lexer $lexer): ExpressionNode while (!$lexer->isEnd()) { $lexer->skipSpaceAndComments(); - if ($lexer->peekOneOf(self::$RULES_CLOSING_DELIMITERS)) { + if ($lexer->peekOneOf(...self::RULES_CLOSING_DELIMITERS)) { return $result; } - if ($lexer->peekOneOf(self::$RULES_ACCESS)) { + if ($lexer->peekOneOf(...self::RULES_ACCESS)) { $result = $this->parseAccess($lexer, $result); continue; } @@ -128,7 +124,7 @@ public function parse(Lexer $lexer): ExpressionNode continue; } - if ($rule = $lexer->peekOneOf(self::$RULES_BINARY_OPERATORS)) { + if ($rule = $lexer->peekOneOf(...self::RULES_BINARY_OPERATORS)) { assert($rule instanceof Rule); if ($this->precedence->mustStopAt($rule)) { return $result; @@ -149,7 +145,7 @@ private function parseUnaryStatement(Lexer $lexer): ExpressionNode if ($lexer->peek(Rule::TEMPLATE_LITERAL_DELIMITER)) { $result = $this->parseTemplateLiteral($lexer); } else { - $result = match ($lexer->expectOneOf(self::$RULES_UNARY)) { + $result = match ($lexer->expectOneOf(...self::RULES_UNARY)) { Rule::SYMBOL_EXCLAMATIONMARK => $this->parseUnaryOperation($lexer), Rule::KEYWORD_TRUE, @@ -360,7 +356,7 @@ private function parseAccess(Lexer $lexer, ExpressionNode $parent): ExpressionNo private function parseAccessType(Lexer $lexer): ?AccessType { - return match ($lexer->probeOneOf(self::$RULES_ACCESS)) { + return match ($lexer->probeOneOf(...self::RULES_ACCESS)) { Rule::SYMBOL_PERIOD => AccessType::MANDATORY, Rule::SYMBOL_OPTCHAIN => AccessType::OPTIONAL, default => null @@ -401,7 +397,7 @@ private function parseBinaryOperator(Lexer $lexer): BinaryOperator return BinaryOperator::LESS_THAN_OR_EQUAL; } - $operator = match ($lexer->readOneOf(self::$RULES_BINARY_OPERATORS)) { + $operator = match ($lexer->readOneOf(...self::RULES_BINARY_OPERATORS)) { Rule::SYMBOL_NULLISH_COALESCE => BinaryOperator::NULLISH_COALESCE, Rule::SYMBOL_BOOLEAN_AND => BinaryOperator::AND, Rule::SYMBOL_BOOLEAN_OR => BinaryOperator::OR, diff --git a/src/Language/Parser/Import/ImportParser.php b/src/Language/Parser/Import/ImportParser.php index b5b6f87..61ec73d 100644 --- a/src/Language/Parser/Import/ImportParser.php +++ b/src/Language/Parser/Import/ImportParser.php @@ -31,9 +31,7 @@ use PackageFactory\ComponentEngine\Language\AST\Node\StringLiteral\StringLiteralNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\LexerException; -use PackageFactory\ComponentEngine\Language\Lexer\Rule\Token; use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; -use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Language\Parser\StringLiteral\StringLiteralParser; use PackageFactory\ComponentEngine\Parser\Source\Range; @@ -41,19 +39,8 @@ final class ImportParser { use Singleton; - private static Rules $RULES_NAME_BOUNDARIES; - private ?StringLiteralParser $pathParser = null; - private function __construct() - { - self::$RULES_NAME_BOUNDARIES ??= Rules::from( - Rule::WORD, - Rule::SYMBOL_COMMA, - Rule::BRACKET_CURLY_CLOSE - ); - } - public function parse(Lexer $lexer): ImportNode { try { diff --git a/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php b/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php index 9912a58..6eb5a64 100644 --- a/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php +++ b/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php @@ -22,37 +22,28 @@ namespace PackageFactory\ComponentEngine\Language\Parser\IntegerLiteral; -use LogicException; use PackageFactory\ComponentEngine\Framework\PHP\Singleton\Singleton; use PackageFactory\ComponentEngine\Language\AST\Node\IntegerLiteral\IntegerFormat; use PackageFactory\ComponentEngine\Language\AST\Node\IntegerLiteral\IntegerLiteralNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\LexerException; use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; -use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; -use PackageFactory\ComponentEngine\Language\Util\DebugHelper; -use PackageFactory\ComponentEngine\Parser\Source\Range; final class IntegerLiteralParser { use Singleton; - private static Rules $INTEGER_TOKEN_TYPES; - - private function __construct() - { - self::$INTEGER_TOKEN_TYPES ??= Rules::from( - Rule::INTEGER_HEXADECIMAL, - Rule::INTEGER_DECIMAL, - Rule::INTEGER_OCTAL, - Rule::INTEGER_BINARY - ); - } + private const RULES_INTEGER_FORMATS = [ + Rule::INTEGER_HEXADECIMAL, + Rule::INTEGER_DECIMAL, + Rule::INTEGER_OCTAL, + Rule::INTEGER_BINARY + ]; public function parse(Lexer $lexer): IntegerLiteralNode { try { - $rule = $lexer->readOneOf(self::$INTEGER_TOKEN_TYPES); + $rule = $lexer->readOneOf(...self::RULES_INTEGER_FORMATS); return new IntegerLiteralNode( rangeInSource: $lexer->buffer->getRange(), @@ -67,17 +58,10 @@ public function parse(Lexer $lexer): IntegerLiteralNode private function getIntegerFormatFromToken(Rule $rule): IntegerFormat { return match ($rule) { + Rule::INTEGER_HEXADECIMAL => IntegerFormat::HEXADECIMAL, Rule::INTEGER_BINARY => IntegerFormat::BINARY, Rule::INTEGER_OCTAL => IntegerFormat::OCTAL, - Rule::INTEGER_DECIMAL => IntegerFormat::DECIMAL, - Rule::INTEGER_HEXADECIMAL => IntegerFormat::HEXADECIMAL, - default => throw new LogicException( - sprintf( - 'Expected %s to be one of %s', - $rule->value, - DebugHelper::describeRules($this->INTEGER_TOKEN_TYPES) - ) - ) + default => IntegerFormat::DECIMAL, }; } } diff --git a/src/Language/Parser/Tag/TagParser.php b/src/Language/Parser/Tag/TagParser.php index 34aebff..cbbf649 100644 --- a/src/Language/Parser/Tag/TagParser.php +++ b/src/Language/Parser/Tag/TagParser.php @@ -47,20 +47,15 @@ final class TagParser { use Singleton; - private static Rules $RULES_ATTRIBUTE_DELIMITERS; + private const RULES_ATTRIBUTE_DELIMITERS = [ + Rule::STRING_LITERAL_DELIMITER, + Rule::BRACKET_CURLY_OPEN + ]; private ?StringLiteralParser $stringLiteralParser = null; private ?TextParser $textParser = null; private ?ExpressionParser $expressionParser = null; - private function __construct() - { - self::$RULES_ATTRIBUTE_DELIMITERS ??= Rules::from( - Rule::STRING_LITERAL_DELIMITER, - Rule::BRACKET_CURLY_OPEN - ); - } - public function parse(Lexer $lexer): TagNode { $lexer->read(Rule::BRACKET_ANGLE_OPEN); @@ -153,7 +148,7 @@ private function parseAttributeName(Lexer $lexer): AttributeNameNode private function parseAttributeValue(Lexer $lexer): null|StringLiteralNode|ExpressionNode { if ($lexer->probe(Rule::SYMBOL_EQUALS)) { - return match ($lexer->expectOneOf(self::$RULES_ATTRIBUTE_DELIMITERS)) { + return match ($lexer->expectOneOf(...self::RULES_ATTRIBUTE_DELIMITERS)) { Rule::STRING_LITERAL_DELIMITER => $this->parseString($lexer), Rule::BRACKET_CURLY_OPEN => diff --git a/src/Language/Parser/Text/TextParser.php b/src/Language/Parser/Text/TextParser.php index 5bf175c..b2b1c8d 100644 --- a/src/Language/Parser/Text/TextParser.php +++ b/src/Language/Parser/Text/TextParser.php @@ -26,7 +26,6 @@ use PackageFactory\ComponentEngine\Language\AST\Node\Text\TextNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; -use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Parser\Source\Position; use PackageFactory\ComponentEngine\Parser\Source\Range; @@ -34,22 +33,16 @@ final class TextParser { use Singleton; - private static Rules $RULES_END_DELIMITERS; - private static Rules $RULES_CONTENT; - - private function __construct() - { - self::$RULES_END_DELIMITERS = Rules::from( - Rule::SYMBOL_CLOSE_TAG, - Rule::BRACKET_ANGLE_OPEN, - Rule::BRACKET_CURLY_OPEN - ); - self::$RULES_CONTENT = Rules::from( - Rule::SPACE, - Rule::END_OF_LINE, - Rule::TEXT - ); - } + private const RULES_END_DELIMITERS = [ + Rule::SYMBOL_CLOSE_TAG, + Rule::BRACKET_ANGLE_OPEN, + Rule::BRACKET_CURLY_OPEN + ]; + private const RULES_CONTENT = [ + Rule::SPACE, + Rule::END_OF_LINE, + Rule::TEXT + ]; public function parse(Lexer $lexer, bool $preserveLeadingSpace = false): ?TextNode { @@ -68,15 +61,15 @@ public function parse(Lexer $lexer, bool $preserveLeadingSpace = false): ?TextNo } $lexer->skipSpace(); - if ($lexer->isEnd() || $lexer->peekOneOf(self::$RULES_END_DELIMITERS)) { + if ($lexer->isEnd() || $lexer->peekOneOf(...self::RULES_END_DELIMITERS)) { return null; } $hasTrailingSpace = false; $trailingSpaceContainsLineBreaks = false; $value = $hasLeadingSpace && $preserveLeadingSpace ? ' ' : ''; - while (!$lexer->isEnd() && !$lexer->peekOneOf(self::$RULES_END_DELIMITERS)) { - $rule = $lexer->readOneOf(self::$RULES_CONTENT); + while (!$lexer->isEnd() && !$lexer->peekOneOf(...self::RULES_END_DELIMITERS)) { + $rule = $lexer->readOneOf(...self::RULES_CONTENT); if ($rule === Rule::TEXT) { $start ??= $lexer->buffer->getStart(); diff --git a/src/Language/Util/DebugHelper.php b/src/Language/Util/DebugHelper.php index 2c6276b..0e164fd 100644 --- a/src/Language/Util/DebugHelper.php +++ b/src/Language/Util/DebugHelper.php @@ -34,7 +34,6 @@ use PackageFactory\ComponentEngine\Language\AST\Node\TernaryOperation\TernaryOperationNode; use PackageFactory\ComponentEngine\Language\AST\Node\ValueReference\ValueReferenceNode; use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; -use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; final class DebugHelper { @@ -111,14 +110,14 @@ public static function describeRule(Rule $rule): string }; } - public static function describeRules(Rules $rules): string + public static function describeRules(Rule ...$rules): string { - if (count($rules->items) === 1) { - return self::describeRule($rules->items[0]); + if (count($rules) === 1) { + return self::describeRule($rules[0]); } - $leadingItems = array_slice($rules->items, 0, -1); - $trailingItem = array_slice($rules->items, -1)[0]; + $leadingItems = array_slice($rules, 0, -1); + $trailingItem = array_slice($rules, -1)[0]; return join(', ', array_map( static fn (Rule $rule) => self::describeRule($rule), diff --git a/test/Unit/Language/Lexer/LexerTest.php b/test/Unit/Language/Lexer/LexerTest.php index a4899fb..0daeb72 100644 --- a/test/Unit/Language/Lexer/LexerTest.php +++ b/test/Unit/Language/Lexer/LexerTest.php @@ -25,7 +25,6 @@ use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\LexerException; use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; -use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Parser\Source\Position; use PackageFactory\ComponentEngine\Parser\Source\Range; use PHPUnit\Framework\TestCase; @@ -261,7 +260,7 @@ public function readSavesTokenOfGivenTypeIfMatchIsFound(string $source, Rule $ex public function readOneOfSavesTokenOfGivenTypeIfMatchIsFound(string $source, Rule $expectedRule): void { $this->lexer = new Lexer($source); - $this->lexer->readOneOf(Rules::from($expectedRule)); + $this->lexer->readOneOf($expectedRule); $this->assertLexerState( startPosition: Position::from(0, 0), @@ -272,13 +271,13 @@ public function readOneOfSavesTokenOfGivenTypeIfMatchIsFound(string $source, Rul } /** - * @return iterable + * @return iterable */ public static function multipleTokensExamples(): iterable { yield ($source = "# This is a comment\n# This is also a comment") => [ $source, - Rules::from(Rule::COMMENT, Rule::END_OF_LINE), + [Rule::COMMENT, Rule::END_OF_LINE], [[0, 0], [0, 18], Rule::COMMENT, '# This is a comment'], [[0, 19], [0, 19], Rule::END_OF_LINE, "\n"], [[1, 0], [1, 23], Rule::COMMENT, '# This is also a comment'], @@ -286,7 +285,7 @@ public static function multipleTokensExamples(): iterable yield ($source = "1765224, -0xAB89CD, true\nnull") => [ $source, - Rules::from( + [ Rule::SYMBOL_DASH, Rule::SYMBOL_COMMA, Rule::INTEGER_HEXADECIMAL, @@ -295,7 +294,7 @@ public static function multipleTokensExamples(): iterable Rule::END_OF_LINE, Rule::KEYWORD_TRUE, Rule::KEYWORD_NULL - ), + ], [[0, 0], [0, 6], Rule::INTEGER_DECIMAL, '1765224'], [[0, 7], [0, 7], Rule::SYMBOL_COMMA, ','], [[0, 8], [0, 8], Rule::SPACE, ' '], @@ -310,13 +309,13 @@ public static function multipleTokensExamples(): iterable yield ($source = '0b100101 892837 0xFFAAEE 0o75374') => [ $source, - Rules::from( + [ Rule::INTEGER_BINARY, Rule::INTEGER_OCTAL, Rule::INTEGER_HEXADECIMAL, Rule::INTEGER_DECIMAL, Rule::SPACE - ), + ], [[0, 0], [0, 7], Rule::INTEGER_BINARY, '0b100101'], [[0, 8], [0, 8], Rule::SPACE, ' '], [[0, 9], [0, 14], Rule::INTEGER_DECIMAL, '892837'], @@ -328,14 +327,14 @@ public static function multipleTokensExamples(): iterable yield ($source = '"This is a string literal with \\n escapes \\xB1 \\u5FA9 \\u{1343E}!"') => [ $source, - Rules::from( + [ Rule::STRING_LITERAL_DELIMITER, Rule::STRING_LITERAL_CONTENT, Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER, Rule::ESCAPE_SEQUENCE_HEXADECIMAL, Rule::ESCAPE_SEQUENCE_UNICODE, Rule::ESCAPE_SEQUENCE_UNICODE_CODEPOINT - ), + ], [[0, 0], [0, 0], Rule::STRING_LITERAL_DELIMITER, '"'], [[0, 1], [0, 30], Rule::STRING_LITERAL_CONTENT, 'This is a string literal with '], [[0, 31], [0, 32], Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER, '\\n'], @@ -357,7 +356,7 @@ public static function multipleTokensExamples(): iterable AFX; yield $source => [ $source, - Rules::from( + [ Rule::TEMPLATE_LITERAL_DELIMITER, Rule::SPACE, Rule::TEMPLATE_LITERAL_CONTENT, @@ -368,7 +367,7 @@ public static function multipleTokensExamples(): iterable Rule::END_OF_LINE, Rule::BRACKET_CURLY_OPEN, Rule::BRACKET_CURLY_CLOSE - ), + ], [[0, 0], [0, 2], Rule::TEMPLATE_LITERAL_DELIMITER, '"""'], [[0, 3], [0, 3], Rule::END_OF_LINE, "\n"], [[1, 0], [1, 3], Rule::SPACE, ' '], @@ -402,7 +401,7 @@ public static function multipleTokensExamples(): iterable AFX; yield $source => [ $source, - Rules::from( + [ Rule::BRACKET_ANGLE_OPEN, Rule::WORD, Rule::SPACE, @@ -415,7 +414,7 @@ public static function multipleTokensExamples(): iterable Rule::BRACKET_CURLY_OPEN, Rule::BRACKET_CURLY_CLOSE, Rule::SYMBOL_COLON - ), + ], [[0, 0], [0, 0], Rule::BRACKET_ANGLE_OPEN, '<'], [[0, 1], [0, 1], Rule::WORD, 'a'], [[0, 2], [0, 2], Rule::SPACE, ' '], @@ -465,7 +464,7 @@ public static function multipleTokensExamples(): iterable AFX; yield $source => [ $source, - Rules::from( + [ Rule::TEXT, Rule::BRACKET_CURLY_OPEN, Rule::BRACKET_CURLY_CLOSE, @@ -473,7 +472,7 @@ public static function multipleTokensExamples(): iterable Rule::END_OF_LINE, Rule::BRACKET_ANGLE_OPEN, Rule::BRACKET_ANGLE_CLOSE - ), + ], [[0, 0], [0, 30], Rule::TEXT, 'ThisIsSomeText-with-expressions'], [[0, 31], [0, 31], Rule::BRACKET_CURLY_OPEN, '{'], [[0, 32], [0, 32], Rule::BRACKET_CURLY_CLOSE, '}'], @@ -493,18 +492,19 @@ public static function multipleTokensExamples(): iterable * @dataProvider multipleTokensExamples * @test * @param string $source + * @param Rule[] $rules * @param array{array{int,int},array{int,int},Rule,string} ...$expectedLexerStates * @return void */ public function testReadOneOfWithMultipleRules( string $source, - Rules $rules, + array $rules, array ...$expectedLexerStates ): void { $this->lexer = new Lexer($source); foreach ($expectedLexerStates as $i => $expectedLexerState) { - $this->lexer->readOneOf($rules); + $this->lexer->readOneOf(...$rules); $this->assertLexerState( startPosition: Position::from(...$expectedLexerState[0]), @@ -661,7 +661,7 @@ function () use ($source, $expectedRule) { $this->lexer->read($expectedRule); }, LexerException::becauseOfUnexpectedCharacterSequence( - expectedRules: Rules::from($expectedRule), + expectedRules: [$expectedRule], affectedRangeInSource: $affectedRangeInSource, actualCharacterSequence: $actualTokenValue ) @@ -675,7 +675,7 @@ public static function failingMultipleTokensExamples(): iterable { yield ($source = "# This is a comment\nThis is not a comment") => [ $source, - $rules = Rules::from(Rule::COMMENT, Rule::END_OF_LINE), + $rules = [Rule::COMMENT, Rule::END_OF_LINE], 3, LexerException::becauseOfUnexpectedCharacterSequence( expectedRules: $rules, @@ -692,14 +692,14 @@ public static function failingMultipleTokensExamples(): iterable * @dataProvider failingMultipleTokensExamples * @test * @param string $source - * @param Rules $rules + * @param Rule[] $rules * @param integer $numberOfReadOperations * @param LexerException $expectedLexerException * @return void */ public function throwsIfCharacterSequenceDoesNotMatchMultipleRules( string $source, - Rules $rules, + array $rules, int $numberOfReadOperations, LexerException $expectedLexerException ): void { @@ -708,7 +708,7 @@ function () use ($source, $rules, $numberOfReadOperations) { $this->lexer = new Lexer($source); foreach(range(0, $numberOfReadOperations) as $i) { - $this->lexer->readOneOf($rules); + $this->lexer->readOneOf(...$rules); } }, $expectedLexerException @@ -726,7 +726,7 @@ function () { $this->lexer->read(Rule::KEYWORD_NULL); }, LexerException::becauseOfUnexpectedEndOfSource( - expectedRules: Rules::from(Rule::KEYWORD_NULL), + expectedRules: [Rule::KEYWORD_NULL], affectedRangeInSource: Range::from( Position::from(0, 0), Position::from(0, 0) @@ -741,7 +741,7 @@ function () { $lexer->read(Rule::KEYWORD_NULL); }, LexerException::becauseOfUnexpectedEndOfSource( - expectedRules: Rules::from(Rule::KEYWORD_NULL), + expectedRules: [Rule::KEYWORD_NULL], affectedRangeInSource: Range::from( Position::from(0, 0), Position::from(0, 4) @@ -757,11 +757,11 @@ public static function multipleRuleUnexpectedEndOfSourceExamples(): iterable { yield ($source = '') => [ $source, - $rules = Rules::from( + $rules = [ Rule::KEYWORD_RETURN, Rule::KEYWORD_NULL, Rule::SPACE - ), + ], 1, LexerException::becauseOfUnexpectedEndOfSource( expectedRules: $rules, @@ -774,11 +774,11 @@ public static function multipleRuleUnexpectedEndOfSourceExamples(): iterable yield ($source = 'return') => [ $source, - $rules = Rules::from( + $rules = [ Rule::KEYWORD_RETURN, Rule::KEYWORD_NULL, Rule::SPACE - ), + ], 2, LexerException::becauseOfUnexpectedEndOfSource( expectedRules: $rules, @@ -791,11 +791,11 @@ public static function multipleRuleUnexpectedEndOfSourceExamples(): iterable yield ($source = 'return ') => [ $source, - $rules = Rules::from( + $rules = [ Rule::KEYWORD_RETURN, Rule::KEYWORD_NULL, Rule::SPACE - ), + ], 3, LexerException::becauseOfUnexpectedEndOfSource( expectedRules: $rules, @@ -811,14 +811,14 @@ public static function multipleRuleUnexpectedEndOfSourceExamples(): iterable * @dataProvider multipleRuleUnexpectedEndOfSourceExamples * @test * @param string $source - * @param Rules $rules + * @param Rule[] $rules * @param integer $numberOfReadOperations * @param LexerException $expectedLexerException * @return void */ public function throwsIfSourceEndsUnexpectedlyWhileReadingMultipleRules( string $source, - Rules $rules, + array $rules, int $numberOfReadOperations, LexerException $expectedLexerException ): void { @@ -827,7 +827,7 @@ function () use ($source, $rules, $numberOfReadOperations) { $this->lexer = new Lexer($source); foreach(range(0, $numberOfReadOperations) as $i) { - $this->lexer->readOneOf($rules); + $this->lexer->readOneOf(...$rules); } }, $expectedLexerException @@ -856,9 +856,9 @@ public function skipsSpace(): void // Multiple $this->lexer = new Lexer('return ' . "\t\n\t" . ' 42'); - $this->lexer->readOneOf(Rules::from(Rule::KEYWORD_RETURN, Rule::INTEGER_DECIMAL)); + $this->lexer->readOneOf(Rule::KEYWORD_RETURN, Rule::INTEGER_DECIMAL); $this->lexer->skipSpace(); - $this->lexer->readOneOf(Rules::from(Rule::KEYWORD_RETURN, Rule::INTEGER_DECIMAL)); + $this->lexer->readOneOf(Rule::KEYWORD_RETURN, Rule::INTEGER_DECIMAL); $this->assertLexerState( startPosition: Position::from(1, 4), @@ -902,27 +902,21 @@ public function skipsSpaceAndComments(): void // Multiple $this->lexer = new Lexer($source); $this->lexer->readOneOf( - Rules::from( - Rule::KEYWORD_IMPORT, - Rule::KEYWORD_EXPORT, - Rule::KEYWORD_COMPONENT - ) + Rule::KEYWORD_IMPORT, + Rule::KEYWORD_EXPORT, + Rule::KEYWORD_COMPONENT ); $this->lexer->skipSpaceAndComments(); $this->lexer->readOneOf( - Rules::from( - Rule::KEYWORD_IMPORT, - Rule::KEYWORD_EXPORT, - Rule::KEYWORD_COMPONENT - ) + Rule::KEYWORD_IMPORT, + Rule::KEYWORD_EXPORT, + Rule::KEYWORD_COMPONENT ); $this->lexer->skipSpaceAndComments(); $this->lexer->readOneOf( - Rules::from( - Rule::KEYWORD_IMPORT, - Rule::KEYWORD_EXPORT, - Rule::KEYWORD_COMPONENT - ) + Rule::KEYWORD_IMPORT, + Rule::KEYWORD_EXPORT, + Rule::KEYWORD_COMPONENT ); $this->assertLexerState( diff --git a/test/Unit/Language/Parser/Export/ExportParserTest.php b/test/Unit/Language/Parser/Export/ExportParserTest.php index 2ee37f0..d9fb1ef 100644 --- a/test/Unit/Language/Parser/Export/ExportParserTest.php +++ b/test/Unit/Language/Parser/Export/ExportParserTest.php @@ -190,11 +190,11 @@ function () { }, ExportCouldNotBeParsed::becauseOfLexerException( cause: LexerException::becauseOfUnexpectedCharacterSequence( - expectedRules: Rules::from( + expectedRules: [ Rule::KEYWORD_COMPONENT, Rule::KEYWORD_ENUM, Rule::KEYWORD_STRUCT - ), + ], affectedRangeInSource: $this->range([0, 7], [0, 7]), actualCharacterSequence: 'n' ), diff --git a/test/Unit/Language/Parser/IntegerLiteral/IntegerLiteralParserTest.php b/test/Unit/Language/Parser/IntegerLiteral/IntegerLiteralParserTest.php index 147d7ba..583873d 100644 --- a/test/Unit/Language/Parser/IntegerLiteral/IntegerLiteralParserTest.php +++ b/test/Unit/Language/Parser/IntegerLiteral/IntegerLiteralParserTest.php @@ -128,12 +128,12 @@ function () { }, IntegerLiteralCouldNotBeParsed::becauseOfLexerException( cause: LexerException::becauseOfUnexpectedEndOfSource( - expectedRules: Rules::from( + expectedRules: [ Rule::INTEGER_HEXADECIMAL, Rule::INTEGER_DECIMAL, Rule::INTEGER_OCTAL, Rule::INTEGER_BINARY - ), + ], affectedRangeInSource: $this->range([0, 0], [0, 0]) ) ) @@ -154,12 +154,12 @@ function () { }, IntegerLiteralCouldNotBeParsed::becauseOfLexerException( cause: LexerException::becauseOfUnexpectedCharacterSequence( - expectedRules: Rules::from( + expectedRules: [ Rule::INTEGER_HEXADECIMAL, Rule::INTEGER_DECIMAL, Rule::INTEGER_OCTAL, Rule::INTEGER_BINARY - ), + ], affectedRangeInSource: $this->range([0, 0], [0, 0]), actualCharacterSequence: 'f' ) From 1009734e14b026131528cd4de13dd3b3a7d93381 Mon Sep 17 00:00:00 2001 From: Wilhelm Behncke Date: Fri, 18 Aug 2023 15:10:10 +0200 Subject: [PATCH 14/19] TASK: Merge scan and scanOneOf methods into unified scan method --- src/Language/Lexer/Lexer.php | 14 +++--- src/Language/Lexer/Scanner/Scanner.php | 31 +----------- .../Lexer/Scanner/ScannerInterface.php | 3 +- .../Language/Lexer/Scanner/ScannerTest.php | 50 +++++++++---------- 4 files changed, 34 insertions(+), 64 deletions(-) diff --git a/src/Language/Lexer/Lexer.php b/src/Language/Lexer/Lexer.php index 7d3f229..d2eb68f 100644 --- a/src/Language/Lexer/Lexer.php +++ b/src/Language/Lexer/Lexer.php @@ -87,7 +87,7 @@ public function read(Rule $rule): void /** @phpstan-impure */ public function readOneOf(Rule ...$rules): Rule { - if ($rule = $this->scanner->scanOneOf(...$rules)) { + if ($rule = $this->scanner->scan(...$rules)) { $this->scanner->commit(); assert($rule instanceof Rule); return $rule; @@ -121,7 +121,7 @@ public function probe(Rule $rule): bool /** @phpstan-impure */ public function probeOneOf(Rule ...$rules): ?Rule { - if ($rule = $this->scanner->scanOneOf(...$rules)) { + if ($rule = $this->scanner->scan(...$rules)) { $this->scanner->commit(); assert($rule instanceof Rule); return $rule; @@ -136,13 +136,13 @@ public function peek(Rule $rule): bool $result = $this->scanner->scan($rule); $this->scanner->dismiss(); - return $result; + return (bool) $result; } /** @phpstan-impure */ public function peekOneOf(Rule ...$rules): ?Rule { - $rule = $this->scanner->scanOneOf(...$rules); + $rule = $this->scanner->scan(...$rules); $this->scanner->dismiss(); assert($rule === null || $rule instanceof Rule); @@ -179,7 +179,7 @@ public function expectOneOf(Rule ...$rules): Rule ); } - if ($rule = $this->scanner->scanOneOf(...$rules)) { + if ($rule = $this->scanner->scan(...$rules)) { $this->scanner->dismiss(); assert($rule instanceof Rule); return $rule; @@ -194,7 +194,7 @@ public function expectOneOf(Rule ...$rules): Rule public function skipSpace(): void { - while ($this->scanner->scanOneOf(...self::RULES_SPACE)) { + while ($this->scanner->scan(...self::RULES_SPACE)) { $this->scanner->commit(); } @@ -207,7 +207,7 @@ public function skipSpace(): void public function skipSpaceAndComments(): void { - while ($this->scanner->scanOneOf(...self::RULES_SPACE_AND_COMMENTS)) { + while ($this->scanner->scan(...self::RULES_SPACE_AND_COMMENTS)) { $this->scanner->commit(); } diff --git a/src/Language/Lexer/Scanner/Scanner.php b/src/Language/Lexer/Scanner/Scanner.php index b240461..3aebe19 100644 --- a/src/Language/Lexer/Scanner/Scanner.php +++ b/src/Language/Lexer/Scanner/Scanner.php @@ -74,36 +74,7 @@ public function assertIsEnd(): void } } - public function scan(RuleInterface $rule): bool - { - assert(!$this->isHalted); - - $this->branch->buffer->flush(); - $this->offset = 0; - - $matcher = $rule->getMatcher(); - while (true) { - $character = $this->branch->characterStream->current(); - $result = $matcher->match($character, $this->offset); - - if ($result === Result::SATISFIED) { - $this->isHalted = true; - return true; - } - - if ($result === Result::CANCEL) { - $this->branch->buffer->append($character); - $this->isHalted = true; - return false; - } - - $this->offset++; - $this->branch->buffer->append($character); - $this->branch->characterStream->next(); - } - } - - public function scanOneOf(RuleInterface ...$rules): ?RuleInterface + public function scan(RuleInterface ...$rules): ?RuleInterface { assert(!$this->isHalted); diff --git a/src/Language/Lexer/Scanner/ScannerInterface.php b/src/Language/Lexer/Scanner/ScannerInterface.php index bb4dc49..0ea08d3 100644 --- a/src/Language/Lexer/Scanner/ScannerInterface.php +++ b/src/Language/Lexer/Scanner/ScannerInterface.php @@ -30,8 +30,7 @@ interface ScannerInterface public function getBuffer(): Buffer; public function isEnd(): bool; - public function scan(RuleInterface $rule): bool; - public function scanOneOf(RuleInterface ...$rules): ?RuleInterface; + public function scan(RuleInterface ...$rules): ?RuleInterface; public function commit(): void; public function dismiss(): void; } diff --git a/test/Unit/Language/Lexer/Scanner/ScannerTest.php b/test/Unit/Language/Lexer/Scanner/ScannerTest.php index 59d722a..e038a02 100644 --- a/test/Unit/Language/Lexer/Scanner/ScannerTest.php +++ b/test/Unit/Language/Lexer/Scanner/ScannerTest.php @@ -64,14 +64,14 @@ public function testInitialScannerStateWhenSourceIsNotEmpty(): void /** * @test */ - public function scanReturnsTrueAndCapturesMatchingCharactersIfGivenRuleMatches(): void + public function scanReturnsGivenRuleAndCapturesMatchingCharactersIfGivenRuleMatches(): void { $scanner = new Scanner('ABC'); $rule = RuleFixtures::withMatcher( MatcherFixtures::everything() ); - $this->assertTrue($scanner->scan($rule)); + $this->assertSame($rule, $scanner->scan($rule)); $this->assertScannerState( expectedBufferStart: Position::from(0, 0), expectedBufferEnd: Position::from(0, 2), @@ -91,7 +91,7 @@ public function scanCapturesEveryCharacterUntilMatchWasFound(): void MatcherFixtures::satisfiedAtOffset(1) ); - $this->assertTrue($scanner->scan($rule)); + $this->assertSame($rule, $scanner->scan($rule)); $this->assertScannerState( expectedBufferStart: Position::from(0, 0), expectedBufferEnd: Position::from(0, 0), @@ -102,7 +102,7 @@ public function scanCapturesEveryCharacterUntilMatchWasFound(): void $scanner->commit(); - $this->assertTrue($scanner->scan($rule)); + $this->assertSame($rule, $scanner->scan($rule)); $this->assertScannerState( expectedBufferStart: Position::from(0, 1), expectedBufferEnd: Position::from(0, 1), @@ -113,7 +113,7 @@ public function scanCapturesEveryCharacterUntilMatchWasFound(): void $scanner->commit(); - $this->assertTrue($scanner->scan($rule)); + $this->assertSame($rule, $scanner->scan($rule)); $this->assertScannerState( expectedBufferStart: Position::from(0, 2), expectedBufferEnd: Position::from(0, 2), @@ -126,14 +126,14 @@ public function scanCapturesEveryCharacterUntilMatchWasFound(): void /** * @test */ - public function scanReturnsFalseButCapturesAllMatchingCharactersUntilFailureIfGivenRuleDoesNotMatch(): void + public function scanReturnsNullAndCapturesAllMatchingCharactersUntilFailureIfGivenRuleDoesNotMatch(): void { $scanner = new Scanner('AABBCC'); $rule = RuleFixtures::withMatcher( MatcherFixtures::cancelAtOffset(3) ); - $this->assertFalse($scanner->scan($rule)); + $this->assertNull($scanner->scan($rule)); $this->assertScannerState( expectedBufferStart: Position::from(0, 0), expectedBufferEnd: Position::from(0, 3), @@ -162,7 +162,7 @@ public function scanCannotContinueOnceHalted(): void /** * @test */ - public function scanReturnsTrueAndCapturesMatchingCharactersIfGivenRuleDoesNotMatchButTheNextRuleDoes(): void + public function scanCapturesMatchingCharactersIfGivenRuleDoesNotMatchButTheNextRuleDoes(): void { $scanner = new Scanner('ABC'); $notMatchingRule = RuleFixtures::withMatcher( @@ -188,7 +188,7 @@ public function scanReturnsTrueAndCapturesMatchingCharactersIfGivenRuleDoesNotMa /** * @test */ - public function scanOneOfCapturesMatchingCharactersAndReturnsTheMatchingRuleIfAnyOfTheGivenRulesMatch(): void + public function scanCapturesMatchingCharactersAndReturnsTheMatchingRuleIfAnyOfTheGivenRulesMatch(): void { $scanner = new Scanner('ABC'); $notMatchingRule1 = RuleFixtures::withMatcher( @@ -203,7 +203,7 @@ public function scanOneOfCapturesMatchingCharactersAndReturnsTheMatchingRuleIfAn $this->assertSame( $matchingRule, - $scanner->scanOneOf($notMatchingRule1, $matchingRule, $notMatchingRule2) + $scanner->scan($notMatchingRule1, $matchingRule, $notMatchingRule2) ); $this->assertScannerState( expectedBufferStart: Position::from(0, 0), @@ -217,7 +217,7 @@ public function scanOneOfCapturesMatchingCharactersAndReturnsTheMatchingRuleIfAn /** * @test */ - public function scanOneOfReturnsNullButCapturesAllMatchingCharactersUntilFailureIfNoneOfTheGivenRulesMatch(): void + public function scanReturnsNullAndCapturesAllMatchingCharactersUntilFailureIfNoneOfTheGivenRulesMatch(): void { // // Non-Match first @@ -235,7 +235,7 @@ public function scanOneOfReturnsNullButCapturesAllMatchingCharactersUntilFailure ); $this->assertNull( - $scanner->scanOneOf($notMatchingRule1, $notMatchingRule2, $notMatchingRule3) + $scanner->scan($notMatchingRule1, $notMatchingRule2, $notMatchingRule3) ); $this->assertScannerState( expectedBufferStart: Position::from(0, 0), @@ -260,9 +260,9 @@ public function scanOneOfReturnsNullButCapturesAllMatchingCharactersUntilFailure MatcherFixtures::satisfiedAtOffset(3) ); - $scanner->scanOneOf($notMatchingRule1, $notMatchingRule2, $matchingRule); + $scanner->scan($notMatchingRule1, $notMatchingRule2, $matchingRule); $scanner->commit(); - $scanner->scanOneOf($notMatchingRule1, $notMatchingRule2); + $scanner->scan($notMatchingRule1, $notMatchingRule2); $this->assertScannerState( expectedBufferStart: Position::from(0, 3), @@ -276,7 +276,7 @@ public function scanOneOfReturnsNullButCapturesAllMatchingCharactersUntilFailure /** * @test */ - public function scanOneOfIfTwoCompetingRulesBothMatchAtTheSameOffsetTheFirstOneThatMatchesWins(): void + public function scanIfTwoCompetingRulesBothMatchAtTheSameOffsetTheFirstOneThatMatchesWins(): void { $scanner = new Scanner('ABC'); $matchingRule1 = RuleFixtures::withMatcher( @@ -291,7 +291,7 @@ public function scanOneOfIfTwoCompetingRulesBothMatchAtTheSameOffsetTheFirstOneT $this->assertSame( $matchingRule1, - $scanner->scanOneOf($matchingRule1, $matchingRule2, $notMatchingRule) + $scanner->scan($matchingRule1, $matchingRule2, $notMatchingRule) ); $this->assertScannerState( expectedBufferStart: Position::from(0, 0), @@ -305,7 +305,7 @@ public function scanOneOfIfTwoCompetingRulesBothMatchAtTheSameOffsetTheFirstOneT /** * @test */ - public function scanOneOfIfTwoCompetingRulesBothMatchAtDifferentOffsetsTheFirstOneThatMatchesWins(): void + public function scanIfTwoCompetingRulesBothMatchAtDifferentOffsetsTheFirstOneThatMatchesWins(): void { $scanner = new Scanner('ABC'); $matchingRule1 = RuleFixtures::withMatcher( @@ -320,7 +320,7 @@ public function scanOneOfIfTwoCompetingRulesBothMatchAtDifferentOffsetsTheFirstO $this->assertSame( $matchingRule2, - $scanner->scanOneOf($matchingRule1, $matchingRule2, $notMatchingRule) + $scanner->scan($matchingRule1, $matchingRule2, $notMatchingRule) ); $this->assertScannerState( expectedBufferStart: Position::from(0, 0), @@ -334,7 +334,7 @@ public function scanOneOfIfTwoCompetingRulesBothMatchAtDifferentOffsetsTheFirstO /** * @test */ - public function scanOneOfCannotContinueOnceScannerIsHalted(): void + public function scanCannotContinueOnceScannerIsHalted(): void { $scanner = new Scanner('ABC'); $rule1 = RuleFixtures::withMatcher( @@ -344,10 +344,10 @@ public function scanOneOfCannotContinueOnceScannerIsHalted(): void MatcherFixtures::nothing() ); - $scanner->scanOneOf($rule1, $rule2); + $scanner->scan($rule1, $rule2); $this->expectException(AssertionError::class); - $scanner->scanOneOf($rule1, $rule2); + $scanner->scan($rule1, $rule2); } /** @@ -436,9 +436,9 @@ public function backspaceReturnsToLastPositionAfterScanOneOfMatch(): void MatcherFixtures::satisfiedAtOffset(4) ); - $scanner->scanOneOf($rule1, $rule2, $rule3); + $scanner->scan($rule1, $rule2, $rule3); $scanner->commit(); - $scanner->scanOneOf($rule1, $rule2, $rule3); + $scanner->scan($rule1, $rule2, $rule3); $this->assertScannerState( expectedBufferStart: Position::from(0, 2), @@ -475,9 +475,9 @@ public function backspaceReturnsToLastPositionAfterScanOneOfMismatch(): void MatcherFixtures::cancelAtOffset(3) ); - $scanner->scanOneOf($rule1, $rule2, $rule3); + $scanner->scan($rule1, $rule2, $rule3); $scanner->commit(); - $scanner->scanOneOf($rule2, $rule3); + $scanner->scan($rule2, $rule3); $this->assertScannerState( expectedBufferStart: Position::from(0, 2), From f7e3382e3a93c8107aaefe2b85a0c9562a73f81f Mon Sep 17 00:00:00 2001 From: Wilhelm Behncke Date: Fri, 18 Aug 2023 15:26:34 +0200 Subject: [PATCH 15/19] TASK: Replace all `*OneOf` methods of Lexer class --- src/Language/Lexer/Lexer.php | 68 ++----------------- .../BooleanLiteral/BooleanLiteralParser.php | 3 +- .../ComponentDeclarationParser.php | 3 +- .../EnumDeclaration/EnumDeclarationParser.php | 3 +- src/Language/Parser/Export/ExportParser.php | 3 +- .../Parser/Expression/ExpressionParser.php | 13 ++-- .../IntegerLiteral/IntegerLiteralParser.php | 2 +- src/Language/Parser/Tag/TagParser.php | 3 +- src/Language/Parser/Text/TextParser.php | 6 +- .../TypeReference/TypeReferenceParser.php | 2 +- test/Unit/Language/Lexer/LexerTest.php | 36 +++------- .../Parser/Export/ExportParserTest.php | 1 - .../IntegerLiteralParserTest.php | 1 - 13 files changed, 28 insertions(+), 116 deletions(-) diff --git a/src/Language/Lexer/Lexer.php b/src/Language/Lexer/Lexer.php index d2eb68f..7f4f15f 100644 --- a/src/Language/Lexer/Lexer.php +++ b/src/Language/Lexer/Lexer.php @@ -63,29 +63,8 @@ public function assertIsEnd(): void } } - public function read(Rule $rule): void - { - if ($this->scanner->scan($rule)) { - $this->scanner->commit(); - return; - } - - if ($this->scanner->isEnd()) { - throw LexerException::becauseOfUnexpectedEndOfSource( - expectedRules: [$rule], - affectedRangeInSource: $this->scanner->getBuffer()->getRange() - ); - } - - throw LexerException::becauseOfUnexpectedCharacterSequence( - expectedRules: [$rule], - affectedRangeInSource: $this->scanner->getBuffer()->getRange(), - actualCharacterSequence: $this->scanner->getBuffer()->getContents() - ); - } - /** @phpstan-impure */ - public function readOneOf(Rule ...$rules): Rule + public function read(Rule ...$rules): Rule { if ($rule = $this->scanner->scan(...$rules)) { $this->scanner->commit(); @@ -107,19 +86,8 @@ public function readOneOf(Rule ...$rules): Rule ); } - public function probe(Rule $rule): bool - { - if ($this->scanner->scan($rule)) { - $this->scanner->commit(); - return true; - } - - $this->scanner->dismiss(); - return false; - } - /** @phpstan-impure */ - public function probeOneOf(Rule ...$rules): ?Rule + public function probe(Rule ...$rules): ?Rule { if ($rule = $this->scanner->scan(...$rules)) { $this->scanner->commit(); @@ -131,16 +99,8 @@ public function probeOneOf(Rule ...$rules): ?Rule return null; } - public function peek(Rule $rule): bool - { - $result = $this->scanner->scan($rule); - $this->scanner->dismiss(); - - return (bool) $result; - } - /** @phpstan-impure */ - public function peekOneOf(Rule ...$rules): ?Rule + public function peek(Rule ...$rules): ?Rule { $rule = $this->scanner->scan(...$rules); $this->scanner->dismiss(); @@ -149,28 +109,8 @@ public function peekOneOf(Rule ...$rules): ?Rule return $rule; } - public function expect(Rule $rule): void - { - if ($this->scanner->isEnd()) { - throw LexerException::becauseOfUnexpectedEndOfSource( - expectedRules: [$rule], - affectedRangeInSource: $this->scanner->getBuffer()->getRange() - ); - } - - if (!$this->scanner->scan($rule)) { - throw LexerException::becauseOfUnexpectedCharacterSequence( - expectedRules: [$rule], - affectedRangeInSource: $this->scanner->getBuffer()->getRange(), - actualCharacterSequence: $this->scanner->getBuffer()->getContents() - ); - } - - $this->scanner->dismiss(); - } - /** @phpstan-impure */ - public function expectOneOf(Rule ...$rules): Rule + public function expect(Rule ...$rules): Rule { if ($this->scanner->isEnd()) { throw LexerException::becauseOfUnexpectedEndOfSource( diff --git a/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php b/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php index bed1053..74021ab 100644 --- a/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php +++ b/src/Language/Parser/BooleanLiteral/BooleanLiteralParser.php @@ -26,7 +26,6 @@ use PackageFactory\ComponentEngine\Language\AST\Node\BooleanLiteral\BooleanLiteralNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; -use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; final class BooleanLiteralParser { @@ -39,7 +38,7 @@ final class BooleanLiteralParser public function parse(Lexer $lexer): BooleanLiteralNode { - $rule = $lexer->readOneOf(...self::RULES_BOOLEAN_KEYWORDS); + $rule = $lexer->read(...self::RULES_BOOLEAN_KEYWORDS); return new BooleanLiteralNode( rangeInSource: $lexer->buffer->getRange(), diff --git a/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php b/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php index 13a615a..bcec23d 100644 --- a/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php +++ b/src/Language/Parser/ComponentDeclaration/ComponentDeclarationParser.php @@ -30,7 +30,6 @@ use PackageFactory\ComponentEngine\Language\AST\Node\PropertyDeclaration\PropertyDeclarationNodes; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; -use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Language\Parser\Expression\ExpressionParser; use PackageFactory\ComponentEngine\Language\Parser\PropertyDeclaration\PropertyDeclarationParser; use PackageFactory\ComponentEngine\Parser\Source\Range; @@ -103,7 +102,7 @@ private function parseReturn(Lexer $lexer): ExpressionNode $this->returnParser ??= new ExpressionParser(); $lexer->read(Rule::KEYWORD_RETURN); - $lexer->readOneOf(...self::RULES_SPACE); + $lexer->read(...self::RULES_SPACE); $lexer->skipSpaceAndComments(); return $this->returnParser->parse($lexer); diff --git a/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php b/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php index 97ccbae..fd9f04d 100644 --- a/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php +++ b/src/Language/Parser/EnumDeclaration/EnumDeclarationParser.php @@ -35,7 +35,6 @@ use PackageFactory\ComponentEngine\Language\AST\Node\StringLiteral\StringLiteralNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; -use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Language\Parser\IntegerLiteral\IntegerLiteralParser; use PackageFactory\ComponentEngine\Language\Parser\StringLiteral\StringLiteralParser; use PackageFactory\ComponentEngine\Parser\Source\Range; @@ -133,7 +132,7 @@ private function parseEnumMemberValue(Lexer $lexer): ?EnumMemberValueNode if ($lexer->probe(Rule::BRACKET_ROUND_OPEN)) { $start = $lexer->buffer->getStart(); - $value = match ($lexer->expectOneOf(...self::RULES_ENUM_MEMBER_VALUE_START)) { + $value = match ($lexer->expect(...self::RULES_ENUM_MEMBER_VALUE_START)) { Rule::STRING_LITERAL_DELIMITER => $this->parseStringLiteral($lexer), default => diff --git a/src/Language/Parser/Export/ExportParser.php b/src/Language/Parser/Export/ExportParser.php index b7b788a..2797d30 100644 --- a/src/Language/Parser/Export/ExportParser.php +++ b/src/Language/Parser/Export/ExportParser.php @@ -31,7 +31,6 @@ use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\LexerException; use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; -use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Language\Parser\ComponentDeclaration\ComponentDeclarationParser; use PackageFactory\ComponentEngine\Language\Parser\EnumDeclaration\EnumDeclarationParser; use PackageFactory\ComponentEngine\Language\Parser\StructDeclaration\StructDeclarationParser; @@ -59,7 +58,7 @@ public function parse(Lexer $lexer): ExportNode $lexer->skipSpace(); - $declaration = match ($lexer->expectOneOf(...self::RULES_DECLARATION_KEYWORDS)) { + $declaration = match ($lexer->expect(...self::RULES_DECLARATION_KEYWORDS)) { Rule::KEYWORD_COMPONENT => $this->parseComponentDeclaration($lexer), Rule::KEYWORD_ENUM => $this->parseEnumDeclaration($lexer), Rule::KEYWORD_STRUCT => $this->parseStructDeclaration($lexer), diff --git a/src/Language/Parser/Expression/ExpressionParser.php b/src/Language/Parser/Expression/ExpressionParser.php index 19323ea..237ceed 100644 --- a/src/Language/Parser/Expression/ExpressionParser.php +++ b/src/Language/Parser/Expression/ExpressionParser.php @@ -35,7 +35,6 @@ use PackageFactory\ComponentEngine\Language\AST\Node\UnaryOperation\UnaryOperator; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; -use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Language\Parser\BooleanLiteral\BooleanLiteralParser; use PackageFactory\ComponentEngine\Language\Parser\IntegerLiteral\IntegerLiteralParser; use PackageFactory\ComponentEngine\Language\Parser\Match\MatchParser; @@ -106,11 +105,11 @@ public function parse(Lexer $lexer): ExpressionNode while (!$lexer->isEnd()) { $lexer->skipSpaceAndComments(); - if ($lexer->peekOneOf(...self::RULES_CLOSING_DELIMITERS)) { + if ($lexer->peek(...self::RULES_CLOSING_DELIMITERS)) { return $result; } - if ($lexer->peekOneOf(...self::RULES_ACCESS)) { + if ($lexer->peek(...self::RULES_ACCESS)) { $result = $this->parseAccess($lexer, $result); continue; } @@ -124,7 +123,7 @@ public function parse(Lexer $lexer): ExpressionNode continue; } - if ($rule = $lexer->peekOneOf(...self::RULES_BINARY_OPERATORS)) { + if ($rule = $lexer->peek(...self::RULES_BINARY_OPERATORS)) { assert($rule instanceof Rule); if ($this->precedence->mustStopAt($rule)) { return $result; @@ -145,7 +144,7 @@ private function parseUnaryStatement(Lexer $lexer): ExpressionNode if ($lexer->peek(Rule::TEMPLATE_LITERAL_DELIMITER)) { $result = $this->parseTemplateLiteral($lexer); } else { - $result = match ($lexer->expectOneOf(...self::RULES_UNARY)) { + $result = match ($lexer->expect(...self::RULES_UNARY)) { Rule::SYMBOL_EXCLAMATIONMARK => $this->parseUnaryOperation($lexer), Rule::KEYWORD_TRUE, @@ -356,7 +355,7 @@ private function parseAccess(Lexer $lexer, ExpressionNode $parent): ExpressionNo private function parseAccessType(Lexer $lexer): ?AccessType { - return match ($lexer->probeOneOf(...self::RULES_ACCESS)) { + return match ($lexer->probe(...self::RULES_ACCESS)) { Rule::SYMBOL_PERIOD => AccessType::MANDATORY, Rule::SYMBOL_OPTCHAIN => AccessType::OPTIONAL, default => null @@ -397,7 +396,7 @@ private function parseBinaryOperator(Lexer $lexer): BinaryOperator return BinaryOperator::LESS_THAN_OR_EQUAL; } - $operator = match ($lexer->readOneOf(...self::RULES_BINARY_OPERATORS)) { + $operator = match ($lexer->read(...self::RULES_BINARY_OPERATORS)) { Rule::SYMBOL_NULLISH_COALESCE => BinaryOperator::NULLISH_COALESCE, Rule::SYMBOL_BOOLEAN_AND => BinaryOperator::AND, Rule::SYMBOL_BOOLEAN_OR => BinaryOperator::OR, diff --git a/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php b/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php index 6eb5a64..583d2f3 100644 --- a/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php +++ b/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php @@ -43,7 +43,7 @@ final class IntegerLiteralParser public function parse(Lexer $lexer): IntegerLiteralNode { try { - $rule = $lexer->readOneOf(...self::RULES_INTEGER_FORMATS); + $rule = $lexer->read(...self::RULES_INTEGER_FORMATS); return new IntegerLiteralNode( rangeInSource: $lexer->buffer->getRange(), diff --git a/src/Language/Parser/Tag/TagParser.php b/src/Language/Parser/Tag/TagParser.php index cbbf649..3f58a31 100644 --- a/src/Language/Parser/Tag/TagParser.php +++ b/src/Language/Parser/Tag/TagParser.php @@ -37,7 +37,6 @@ use PackageFactory\ComponentEngine\Language\AST\Node\Text\TextNode; use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; -use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Language\Parser\Expression\ExpressionParser; use PackageFactory\ComponentEngine\Language\Parser\StringLiteral\StringLiteralParser; use PackageFactory\ComponentEngine\Language\Parser\Text\TextParser; @@ -148,7 +147,7 @@ private function parseAttributeName(Lexer $lexer): AttributeNameNode private function parseAttributeValue(Lexer $lexer): null|StringLiteralNode|ExpressionNode { if ($lexer->probe(Rule::SYMBOL_EQUALS)) { - return match ($lexer->expectOneOf(...self::RULES_ATTRIBUTE_DELIMITERS)) { + return match ($lexer->expect(...self::RULES_ATTRIBUTE_DELIMITERS)) { Rule::STRING_LITERAL_DELIMITER => $this->parseString($lexer), Rule::BRACKET_CURLY_OPEN => diff --git a/src/Language/Parser/Text/TextParser.php b/src/Language/Parser/Text/TextParser.php index b2b1c8d..0aad8ca 100644 --- a/src/Language/Parser/Text/TextParser.php +++ b/src/Language/Parser/Text/TextParser.php @@ -61,15 +61,15 @@ public function parse(Lexer $lexer, bool $preserveLeadingSpace = false): ?TextNo } $lexer->skipSpace(); - if ($lexer->isEnd() || $lexer->peekOneOf(...self::RULES_END_DELIMITERS)) { + if ($lexer->isEnd() || $lexer->peek(...self::RULES_END_DELIMITERS)) { return null; } $hasTrailingSpace = false; $trailingSpaceContainsLineBreaks = false; $value = $hasLeadingSpace && $preserveLeadingSpace ? ' ' : ''; - while (!$lexer->isEnd() && !$lexer->peekOneOf(...self::RULES_END_DELIMITERS)) { - $rule = $lexer->readOneOf(...self::RULES_CONTENT); + while (!$lexer->isEnd() && !$lexer->peek(...self::RULES_END_DELIMITERS)) { + $rule = $lexer->read(...self::RULES_CONTENT); if ($rule === Rule::TEXT) { $start ??= $lexer->buffer->getStart(); diff --git a/src/Language/Parser/TypeReference/TypeReferenceParser.php b/src/Language/Parser/TypeReference/TypeReferenceParser.php index d23b256..7801381 100644 --- a/src/Language/Parser/TypeReference/TypeReferenceParser.php +++ b/src/Language/Parser/TypeReference/TypeReferenceParser.php @@ -43,7 +43,7 @@ final class TypeReferenceParser public function parse(Lexer $lexer): TypeReferenceNode { $this->start = null; - if ($isOptional = $lexer->probe(Rule::SYMBOL_QUESTIONMARK)) { + if ($isOptional = (bool) $lexer->probe(Rule::SYMBOL_QUESTIONMARK)) { $this->start = $lexer->buffer->getStart(); } $typeNameNodes = $this->parseTypeNames($lexer); diff --git a/test/Unit/Language/Lexer/LexerTest.php b/test/Unit/Language/Lexer/LexerTest.php index 0daeb72..71737ab 100644 --- a/test/Unit/Language/Lexer/LexerTest.php +++ b/test/Unit/Language/Lexer/LexerTest.php @@ -250,26 +250,6 @@ public function readSavesTokenOfGivenTypeIfMatchIsFound(string $source, Rule $ex ); } - /** - * @dataProvider singleTokenExamples - * @test - * @param string $source - * @param Rule $expectedRule - * @return void - */ - public function readOneOfSavesTokenOfGivenTypeIfMatchIsFound(string $source, Rule $expectedRule): void - { - $this->lexer = new Lexer($source); - $this->lexer->readOneOf($expectedRule); - - $this->assertLexerState( - startPosition: Position::from(0, 0), - endPosition: Position::from(0, \mb_strlen($source) - 1), - buffer: $source, - isEnd: true - ); - } - /** * @return iterable */ @@ -504,7 +484,7 @@ public function testReadOneOfWithMultipleRules( $this->lexer = new Lexer($source); foreach ($expectedLexerStates as $i => $expectedLexerState) { - $this->lexer->readOneOf(...$rules); + $this->lexer->read(...$rules); $this->assertLexerState( startPosition: Position::from(...$expectedLexerState[0]), @@ -708,7 +688,7 @@ function () use ($source, $rules, $numberOfReadOperations) { $this->lexer = new Lexer($source); foreach(range(0, $numberOfReadOperations) as $i) { - $this->lexer->readOneOf(...$rules); + $this->lexer->read(...$rules); } }, $expectedLexerException @@ -827,7 +807,7 @@ function () use ($source, $rules, $numberOfReadOperations) { $this->lexer = new Lexer($source); foreach(range(0, $numberOfReadOperations) as $i) { - $this->lexer->readOneOf(...$rules); + $this->lexer->read(...$rules); } }, $expectedLexerException @@ -856,9 +836,9 @@ public function skipsSpace(): void // Multiple $this->lexer = new Lexer('return ' . "\t\n\t" . ' 42'); - $this->lexer->readOneOf(Rule::KEYWORD_RETURN, Rule::INTEGER_DECIMAL); + $this->lexer->read(Rule::KEYWORD_RETURN, Rule::INTEGER_DECIMAL); $this->lexer->skipSpace(); - $this->lexer->readOneOf(Rule::KEYWORD_RETURN, Rule::INTEGER_DECIMAL); + $this->lexer->read(Rule::KEYWORD_RETURN, Rule::INTEGER_DECIMAL); $this->assertLexerState( startPosition: Position::from(1, 4), @@ -901,19 +881,19 @@ public function skipsSpaceAndComments(): void // Multiple $this->lexer = new Lexer($source); - $this->lexer->readOneOf( + $this->lexer->read( Rule::KEYWORD_IMPORT, Rule::KEYWORD_EXPORT, Rule::KEYWORD_COMPONENT ); $this->lexer->skipSpaceAndComments(); - $this->lexer->readOneOf( + $this->lexer->read( Rule::KEYWORD_IMPORT, Rule::KEYWORD_EXPORT, Rule::KEYWORD_COMPONENT ); $this->lexer->skipSpaceAndComments(); - $this->lexer->readOneOf( + $this->lexer->read( Rule::KEYWORD_IMPORT, Rule::KEYWORD_EXPORT, Rule::KEYWORD_COMPONENT diff --git a/test/Unit/Language/Parser/Export/ExportParserTest.php b/test/Unit/Language/Parser/Export/ExportParserTest.php index d9fb1ef..839b0c7 100644 --- a/test/Unit/Language/Parser/Export/ExportParserTest.php +++ b/test/Unit/Language/Parser/Export/ExportParserTest.php @@ -50,7 +50,6 @@ use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\LexerException; use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; -use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Language\Parser\Export\ExportCouldNotBeParsed; use PackageFactory\ComponentEngine\Language\Parser\Export\ExportParser; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; diff --git a/test/Unit/Language/Parser/IntegerLiteral/IntegerLiteralParserTest.php b/test/Unit/Language/Parser/IntegerLiteral/IntegerLiteralParserTest.php index 583873d..99f4a8e 100644 --- a/test/Unit/Language/Parser/IntegerLiteral/IntegerLiteralParserTest.php +++ b/test/Unit/Language/Parser/IntegerLiteral/IntegerLiteralParserTest.php @@ -27,7 +27,6 @@ use PackageFactory\ComponentEngine\Language\Lexer\Lexer; use PackageFactory\ComponentEngine\Language\Lexer\LexerException; use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; -use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rules; use PackageFactory\ComponentEngine\Language\Parser\IntegerLiteral\IntegerLiteralCouldNotBeParsed; use PackageFactory\ComponentEngine\Language\Parser\IntegerLiteral\IntegerLiteralParser; use PackageFactory\ComponentEngine\Test\Unit\Language\Parser\ParserTestCase; From 9b1d34d5a0351a67ecc2ab60f59838d435b21fc1 Mon Sep 17 00:00:00 2001 From: Wilhelm Behncke Date: Fri, 18 Aug 2023 15:51:21 +0200 Subject: [PATCH 16/19] TASK: Turn abstract Matcher class into interface ...and move the (rule -> matcher) cache concern over to the Scanner class. --- .../Lexer/Matcher/Characters/Characters.php | 4 +- src/Language/Lexer/Matcher/Exact/Exact.php | 4 +- src/Language/Lexer/Matcher/Fixed/Fixed.php | 6 +- src/Language/Lexer/Matcher/Matcher.php | 190 ------------------ .../Lexer/Matcher/MatcherInterface.php | 28 +++ src/Language/Lexer/Matcher/Not/Not.php | 6 +- .../Lexer/Matcher/Optional/Optional.php | 6 +- .../Lexer/Matcher/Sequence/Sequence.php | 8 +- src/Language/Lexer/Rule/Rule.php | 156 +++++++++++++- src/Language/Lexer/Rule/RuleInterface.php | 4 +- src/Language/Lexer/Scanner/Scanner.php | 7 +- .../Lexer/Matcher/MatcherFixtures.php | 18 +- .../Unit/Language/Lexer/Rule/RuleFixtures.php | 8 +- 13 files changed, 219 insertions(+), 226 deletions(-) delete mode 100644 src/Language/Lexer/Matcher/Matcher.php create mode 100644 src/Language/Lexer/Matcher/MatcherInterface.php diff --git a/src/Language/Lexer/Matcher/Characters/Characters.php b/src/Language/Lexer/Matcher/Characters/Characters.php index 4a6059b..1fb4efd 100644 --- a/src/Language/Lexer/Matcher/Characters/Characters.php +++ b/src/Language/Lexer/Matcher/Characters/Characters.php @@ -23,9 +23,9 @@ namespace PackageFactory\ComponentEngine\Language\Lexer\Matcher\Characters; use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result; -use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\MatcherInterface; -final class Characters extends Matcher +final class Characters implements MatcherInterface { public function __construct( private readonly string $allowedCharacters, diff --git a/src/Language/Lexer/Matcher/Exact/Exact.php b/src/Language/Lexer/Matcher/Exact/Exact.php index bcb3d6f..f6d3c48 100644 --- a/src/Language/Lexer/Matcher/Exact/Exact.php +++ b/src/Language/Lexer/Matcher/Exact/Exact.php @@ -23,9 +23,9 @@ namespace PackageFactory\ComponentEngine\Language\Lexer\Matcher\Exact; use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result; -use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\MatcherInterface; -final class Exact extends Matcher +final class Exact implements MatcherInterface { private int $length; diff --git a/src/Language/Lexer/Matcher/Fixed/Fixed.php b/src/Language/Lexer/Matcher/Fixed/Fixed.php index 4106fe6..61c1a14 100644 --- a/src/Language/Lexer/Matcher/Fixed/Fixed.php +++ b/src/Language/Lexer/Matcher/Fixed/Fixed.php @@ -23,13 +23,13 @@ namespace PackageFactory\ComponentEngine\Language\Lexer\Matcher\Fixed; use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result; -use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\MatcherInterface; -final class Fixed extends Matcher +final class Fixed implements MatcherInterface { public function __construct( private readonly int $fixedLength, - private readonly Matcher $innerMatcher + private readonly MatcherInterface $innerMatcher ) { assert($this->fixedLength > 0); } diff --git a/src/Language/Lexer/Matcher/Matcher.php b/src/Language/Lexer/Matcher/Matcher.php deleted file mode 100644 index f971da9..0000000 --- a/src/Language/Lexer/Matcher/Matcher.php +++ /dev/null @@ -1,190 +0,0 @@ -. - */ - -declare(strict_types=1); - -namespace PackageFactory\ComponentEngine\Language\Lexer\Matcher; - -use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Characters\Characters; -use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Exact\Exact; -use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Fixed\Fixed; -use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Not\Not; -use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Optional\Optional; -use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Sequence\Sequence; -use PackageFactory\ComponentEngine\Language\Lexer\Rule\Rule; - -abstract class Matcher -{ - /** - * @var array - */ - private static $instancesByRule = []; - - final public static function for(Rule $rule): self - { - return self::$instancesByRule[$rule->value] ??= match ($rule) { - Rule::COMMENT => - new Sequence( - new Exact('#'), - new Optional(new Not(new Exact("\n"))) - ), - - Rule::KEYWORD_FROM => - new Exact('from'), - Rule::KEYWORD_IMPORT => - new Exact('import'), - Rule::KEYWORD_EXPORT => - new Exact('export'), - Rule::KEYWORD_ENUM => - new Exact('enum'), - Rule::KEYWORD_STRUCT => - new Exact('struct'), - Rule::KEYWORD_COMPONENT => - new Exact('component'), - Rule::KEYWORD_MATCH => - new Exact('match'), - Rule::KEYWORD_DEFAULT => - new Exact('default'), - Rule::KEYWORD_RETURN => - new Exact('return'), - Rule::KEYWORD_TRUE => - new Exact('true'), - Rule::KEYWORD_FALSE => - new Exact('false'), - Rule::KEYWORD_NULL => - new Exact('null'), - - Rule::STRING_LITERAL_DELIMITER => - new Exact('"'), - Rule::STRING_LITERAL_CONTENT => - new Not(new Characters('"\\')), - - Rule::INTEGER_BINARY => - new Sequence(new Exact('0b'), new Characters('01')), - Rule::INTEGER_OCTAL => - new Sequence(new Exact('0o'), new Characters('01234567')), - Rule::INTEGER_DECIMAL => - new Characters('0123456789', 'box'), - Rule::INTEGER_HEXADECIMAL => - new Sequence(new Exact('0x'), new Characters('0123456789ABCDEF')), - - Rule::TEMPLATE_LITERAL_DELIMITER => - new Exact('"""'), - Rule::TEMPLATE_LITERAL_CONTENT => - new Not(new Characters('{}\\' . "\n")), - - Rule::ESCAPE_SEQUENCE_SINGLE_CHARACTER => - new Sequence( - new Exact('\\'), - new Fixed(1, new Characters('nrtvef\\$"')) - ), - Rule::ESCAPE_SEQUENCE_HEXADECIMAL => - new Sequence( - new Exact('\\x'), - new Fixed(2, new Characters('abcdefABCDEF0123456789')) - ), - Rule::ESCAPE_SEQUENCE_UNICODE => - new Sequence( - new Exact('\\u'), - new Fixed(4, new Characters('abcdefABCDEF0123456789')) - ), - Rule::ESCAPE_SEQUENCE_UNICODE_CODEPOINT => - new Sequence( - new Exact('\\u{'), - new Characters('abcdefABCDEF0123456789'), - new Exact('}') - ), - - Rule::BRACKET_CURLY_OPEN => - new Exact('{'), - Rule::BRACKET_CURLY_CLOSE => - new Exact('}'), - Rule::BRACKET_ROUND_OPEN => - new Exact('('), - Rule::BRACKET_ROUND_CLOSE => - new Exact(')'), - Rule::BRACKET_SQUARE_OPEN => - new Exact('['), - Rule::BRACKET_SQUARE_CLOSE => - new Exact(']'), - Rule::BRACKET_ANGLE_OPEN => - new Exact('<'), - Rule::BRACKET_ANGLE_CLOSE => - new Exact('>'), - - Rule::SYMBOL_COLON => - new Exact(':'), - Rule::SYMBOL_PERIOD => - new Exact('.'), - Rule::SYMBOL_QUESTIONMARK => - new Exact('?'), - Rule::SYMBOL_EXCLAMATIONMARK => - new Exact('!'), - Rule::SYMBOL_COMMA => - new Exact(','), - Rule::SYMBOL_DASH => - new Exact('-'), - Rule::SYMBOL_EQUALS => - new Exact('='), - Rule::SYMBOL_SLASH_FORWARD => - new Exact('/'), - Rule::SYMBOL_PIPE => - new Exact('|'), - Rule::SYMBOL_BOOLEAN_AND => - new Exact('&&'), - Rule::SYMBOL_BOOLEAN_OR => - new Exact('||'), - Rule::SYMBOL_STRICT_EQUALS => - new Exact('==='), - Rule::SYMBOL_NOT_EQUALS => - new Exact('!=='), - Rule::SYMBOL_GREATER_THAN => - new Exact('>'), - Rule::SYMBOL_GREATER_THAN_OR_EQUAL => - new Exact('>='), - Rule::SYMBOL_LESS_THAN => - new Exact('<'), - Rule::SYMBOL_LESS_THAN_OR_EQUAL => - new Exact('<='), - Rule::SYMBOL_ARROW_SINGLE => - new Exact('->'), - Rule::SYMBOL_OPTCHAIN => - new Exact('?.'), - Rule::SYMBOL_NULLISH_COALESCE => - new Exact('??'), - Rule::SYMBOL_CLOSE_TAG => - new Exact(' - new Characters( - 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' - ), - Rule::TEXT => - new Not(new Characters('<{}>' . " \t\n")), - - Rule::SPACE => - new Characters(" \t"), - Rule::END_OF_LINE => - new Exact("\n") - }; - } - - abstract public function match(?string $character, int $offset): Result; -} diff --git a/src/Language/Lexer/Matcher/MatcherInterface.php b/src/Language/Lexer/Matcher/MatcherInterface.php new file mode 100644 index 0000000..f490190 --- /dev/null +++ b/src/Language/Lexer/Matcher/MatcherInterface.php @@ -0,0 +1,28 @@ +. + */ + +declare(strict_types=1); + +namespace PackageFactory\ComponentEngine\Language\Lexer\Matcher; + +interface MatcherInterface +{ + public function match(?string $character, int $offset): Result; +} diff --git a/src/Language/Lexer/Matcher/Not/Not.php b/src/Language/Lexer/Matcher/Not/Not.php index 206e87f..a5aff23 100644 --- a/src/Language/Lexer/Matcher/Not/Not.php +++ b/src/Language/Lexer/Matcher/Not/Not.php @@ -23,11 +23,11 @@ namespace PackageFactory\ComponentEngine\Language\Lexer\Matcher\Not; use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result; -use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\MatcherInterface; -final class Not extends Matcher +final class Not implements MatcherInterface { - public function __construct(private readonly Matcher $innerMatcher) + public function __construct(private readonly MatcherInterface $innerMatcher) { } diff --git a/src/Language/Lexer/Matcher/Optional/Optional.php b/src/Language/Lexer/Matcher/Optional/Optional.php index 20de392..a6c1c7c 100644 --- a/src/Language/Lexer/Matcher/Optional/Optional.php +++ b/src/Language/Lexer/Matcher/Optional/Optional.php @@ -23,11 +23,11 @@ namespace PackageFactory\ComponentEngine\Language\Lexer\Matcher\Optional; use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result; -use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\MatcherInterface; -final class Optional extends Matcher +final class Optional implements MatcherInterface { - public function __construct(private readonly Matcher $innerMatcher) + public function __construct(private readonly MatcherInterface $innerMatcher) { } diff --git a/src/Language/Lexer/Matcher/Sequence/Sequence.php b/src/Language/Lexer/Matcher/Sequence/Sequence.php index d112794..32482bb 100644 --- a/src/Language/Lexer/Matcher/Sequence/Sequence.php +++ b/src/Language/Lexer/Matcher/Sequence/Sequence.php @@ -23,20 +23,20 @@ namespace PackageFactory\ComponentEngine\Language\Lexer\Matcher\Sequence; use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result; -use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\MatcherInterface; -final class Sequence extends Matcher +final class Sequence implements MatcherInterface { private int $lastStop = 0; private int $matcherIndex = 0; private int $numberOfMatchers; /** - * @var Matcher[] + * @var MatcherInterface[] */ private readonly array $matchers; - public function __construct(Matcher ...$matchers) + public function __construct(MatcherInterface ...$matchers) { $this->matchers = $matchers; $this->numberOfMatchers = count($matchers); diff --git a/src/Language/Lexer/Rule/Rule.php b/src/Language/Lexer/Rule/Rule.php index 87ee387..a480b95 100644 --- a/src/Language/Lexer/Rule/Rule.php +++ b/src/Language/Lexer/Rule/Rule.php @@ -22,7 +22,13 @@ namespace PackageFactory\ComponentEngine\Language\Lexer\Rule; -use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Characters\Characters; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Exact\Exact; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Fixed\Fixed; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\MatcherInterface; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Not\Not; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Optional\Optional; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Sequence\Sequence; enum Rule: string implements RuleInterface { @@ -94,8 +100,152 @@ enum Rule: string implements RuleInterface case SPACE = 'SPACE'; case END_OF_LINE = 'END_OF_LINE'; - public function getMatcher(): Matcher + public function getMatcher(): MatcherInterface { - return Matcher::for($this); + return match ($this) { + self::COMMENT => + new Sequence( + new Exact('#'), + new Optional(new Not(new Exact("\n"))) + ), + + self::KEYWORD_FROM => + new Exact('from'), + self::KEYWORD_IMPORT => + new Exact('import'), + self::KEYWORD_EXPORT => + new Exact('export'), + self::KEYWORD_ENUM => + new Exact('enum'), + self::KEYWORD_STRUCT => + new Exact('struct'), + self::KEYWORD_COMPONENT => + new Exact('component'), + self::KEYWORD_MATCH => + new Exact('match'), + self::KEYWORD_DEFAULT => + new Exact('default'), + self::KEYWORD_RETURN => + new Exact('return'), + self::KEYWORD_TRUE => + new Exact('true'), + self::KEYWORD_FALSE => + new Exact('false'), + self::KEYWORD_NULL => + new Exact('null'), + + self::STRING_LITERAL_DELIMITER => + new Exact('"'), + self::STRING_LITERAL_CONTENT => + new Not(new Characters('"\\')), + + self::INTEGER_BINARY => + new Sequence(new Exact('0b'), new Characters('01')), + self::INTEGER_OCTAL => + new Sequence(new Exact('0o'), new Characters('01234567')), + self::INTEGER_DECIMAL => + new Characters('0123456789', 'box'), + self::INTEGER_HEXADECIMAL => + new Sequence(new Exact('0x'), new Characters('0123456789ABCDEF')), + + self::TEMPLATE_LITERAL_DELIMITER => + new Exact('"""'), + self::TEMPLATE_LITERAL_CONTENT => + new Not(new Characters('{}\\' . "\n")), + + self::ESCAPE_SEQUENCE_SINGLE_CHARACTER => + new Sequence( + new Exact('\\'), + new Fixed(1, new Characters('nrtvef\\$"')) + ), + self::ESCAPE_SEQUENCE_HEXADECIMAL => + new Sequence( + new Exact('\\x'), + new Fixed(2, new Characters('abcdefABCDEF0123456789')) + ), + self::ESCAPE_SEQUENCE_UNICODE => + new Sequence( + new Exact('\\u'), + new Fixed(4, new Characters('abcdefABCDEF0123456789')) + ), + self::ESCAPE_SEQUENCE_UNICODE_CODEPOINT => + new Sequence( + new Exact('\\u{'), + new Characters('abcdefABCDEF0123456789'), + new Exact('}') + ), + + self::BRACKET_CURLY_OPEN => + new Exact('{'), + self::BRACKET_CURLY_CLOSE => + new Exact('}'), + self::BRACKET_ROUND_OPEN => + new Exact('('), + self::BRACKET_ROUND_CLOSE => + new Exact(')'), + self::BRACKET_SQUARE_OPEN => + new Exact('['), + self::BRACKET_SQUARE_CLOSE => + new Exact(']'), + self::BRACKET_ANGLE_OPEN => + new Exact('<'), + self::BRACKET_ANGLE_CLOSE => + new Exact('>'), + + self::SYMBOL_COLON => + new Exact(':'), + self::SYMBOL_PERIOD => + new Exact('.'), + self::SYMBOL_QUESTIONMARK => + new Exact('?'), + self::SYMBOL_EXCLAMATIONMARK => + new Exact('!'), + self::SYMBOL_COMMA => + new Exact(','), + self::SYMBOL_DASH => + new Exact('-'), + self::SYMBOL_EQUALS => + new Exact('='), + self::SYMBOL_SLASH_FORWARD => + new Exact('/'), + self::SYMBOL_PIPE => + new Exact('|'), + self::SYMBOL_BOOLEAN_AND => + new Exact('&&'), + self::SYMBOL_BOOLEAN_OR => + new Exact('||'), + self::SYMBOL_STRICT_EQUALS => + new Exact('==='), + self::SYMBOL_NOT_EQUALS => + new Exact('!=='), + self::SYMBOL_GREATER_THAN => + new Exact('>'), + self::SYMBOL_GREATER_THAN_OR_EQUAL => + new Exact('>='), + self::SYMBOL_LESS_THAN => + new Exact('<'), + self::SYMBOL_LESS_THAN_OR_EQUAL => + new Exact('<='), + self::SYMBOL_ARROW_SINGLE => + new Exact('->'), + self::SYMBOL_OPTCHAIN => + new Exact('?.'), + self::SYMBOL_NULLISH_COALESCE => + new Exact('??'), + self::SYMBOL_CLOSE_TAG => + new Exact(' + new Characters( + 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' + ), + self::TEXT => + new Not(new Characters('<{}>' . " \t\n")), + + self::SPACE => + new Characters(" \t"), + self::END_OF_LINE => + new Exact("\n") + }; } } diff --git a/src/Language/Lexer/Rule/RuleInterface.php b/src/Language/Lexer/Rule/RuleInterface.php index 3da73c7..401eed2 100644 --- a/src/Language/Lexer/Rule/RuleInterface.php +++ b/src/Language/Lexer/Rule/RuleInterface.php @@ -22,9 +22,9 @@ namespace PackageFactory\ComponentEngine\Language\Lexer\Rule; -use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\MatcherInterface; interface RuleInterface { - public function getMatcher(): Matcher; + public function getMatcher(): MatcherInterface; } diff --git a/src/Language/Lexer/Scanner/Scanner.php b/src/Language/Lexer/Scanner/Scanner.php index 3aebe19..6b7bbd7 100644 --- a/src/Language/Lexer/Scanner/Scanner.php +++ b/src/Language/Lexer/Scanner/Scanner.php @@ -24,14 +24,18 @@ use PackageFactory\ComponentEngine\Language\Lexer\Buffer\Buffer; use PackageFactory\ComponentEngine\Language\Lexer\CharacterStream\CharacterStream; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\MatcherInterface; use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result; use PackageFactory\ComponentEngine\Language\Lexer\Rule\RuleInterface; +use SplObjectStorage; final class Scanner implements ScannerInterface { private readonly ScannerState $main; private readonly ScannerState $branch; + /** @var SplObjectStorage */ + private SplObjectStorage $ruleCache; private bool $isHalted; private int $offset; @@ -46,6 +50,7 @@ public function __construct(string $source) buffer: new Buffer() ); + $this->ruleCache = new SplObjectStorage(); $this->isHalted = false; $this->offset = 0; } @@ -87,7 +92,7 @@ public function scan(RuleInterface ...$rules): ?RuleInterface $nextCandidates = []; foreach ($candidates as $candidate) { - $matcher = $candidate->getMatcher(); + $matcher = $this->ruleCache[$candidate] ??= $candidate->getMatcher(); $result = $matcher->match($character, $this->offset); if ($result === Result::SATISFIED) { diff --git a/test/Unit/Language/Lexer/Matcher/MatcherFixtures.php b/test/Unit/Language/Lexer/Matcher/MatcherFixtures.php index a0090f2..0f538c3 100644 --- a/test/Unit/Language/Lexer/Matcher/MatcherFixtures.php +++ b/test/Unit/Language/Lexer/Matcher/MatcherFixtures.php @@ -22,14 +22,14 @@ namespace PackageFactory\ComponentEngine\Test\Unit\Language\Lexer\Matcher; -use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\MatcherInterface; use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result; final class MatcherFixtures { - public static function everything(): Matcher + public static function everything(): MatcherInterface { - return new class extends Matcher + return new class implements MatcherInterface { public function match(?string $character, int $offset): Result { @@ -42,9 +42,9 @@ public function match(?string $character, int $offset): Result }; } - public static function nothing(): Matcher + public static function nothing(): MatcherInterface { - return new class extends Matcher + return new class implements MatcherInterface { public function match(?string $character, int $offset): Result { @@ -53,9 +53,9 @@ public function match(?string $character, int $offset): Result }; } - public static function satisfiedAtOffset(int $matchingOffset): Matcher + public static function satisfiedAtOffset(int $matchingOffset): MatcherInterface { - return new class($matchingOffset) extends Matcher + return new class($matchingOffset) implements MatcherInterface { public function __construct(private readonly int $matchingOffset) { @@ -72,9 +72,9 @@ public function match(?string $character, int $offset): Result }; } - public static function cancelAtOffset(int $matchingOffset): Matcher + public static function cancelAtOffset(int $matchingOffset): MatcherInterface { - return new class($matchingOffset) extends Matcher + return new class($matchingOffset) implements MatcherInterface { public function __construct(private readonly int $matchingOffset) { diff --git a/test/Unit/Language/Lexer/Rule/RuleFixtures.php b/test/Unit/Language/Lexer/Rule/RuleFixtures.php index c6168e7..1832f70 100644 --- a/test/Unit/Language/Lexer/Rule/RuleFixtures.php +++ b/test/Unit/Language/Lexer/Rule/RuleFixtures.php @@ -22,20 +22,20 @@ namespace PackageFactory\ComponentEngine\Test\Unit\Language\Lexer\Rule; -use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher; +use PackageFactory\ComponentEngine\Language\Lexer\Matcher\MatcherInterface; use PackageFactory\ComponentEngine\Language\Lexer\Rule\RuleInterface; final class RuleFixtures { - public static function withMatcher(Matcher $matcher): RuleInterface + public static function withMatcher(MatcherInterface $matcher): RuleInterface { return new class($matcher) implements RuleInterface { - public function __construct(private readonly Matcher $matcher) + public function __construct(private readonly MatcherInterface $matcher) { } - public function getMatcher(): Matcher + public function getMatcher(): MatcherInterface { return $this->matcher; } From dcbf92f03c9a3ff5854cb648758416c58d5161c1 Mon Sep 17 00:00:00 2001 From: Wilhelm Behncke Date: Fri, 18 Aug 2023 16:17:58 +0200 Subject: [PATCH 17/19] TASK: Add method `getRemainder` to Scanner class --- .../Lexer/CharacterStream/CharacterStream.php | 2 +- src/Language/Lexer/Scanner/Scanner.php | 5 +++ .../Lexer/Scanner/ScannerInterface.php | 1 + .../Language/Lexer/Scanner/ScannerTest.php | 31 +++++++++++++++++-- 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/Language/Lexer/CharacterStream/CharacterStream.php b/src/Language/Lexer/CharacterStream/CharacterStream.php index 99af817..0b221e4 100644 --- a/src/Language/Lexer/CharacterStream/CharacterStream.php +++ b/src/Language/Lexer/CharacterStream/CharacterStream.php @@ -78,7 +78,7 @@ public function overwrite(CharacterStream $other): void $other->characterUnderCursor = $this->characterUnderCursor; } - public function getRest(): string + public function getRemainder(): string { return $this->characterUnderCursor . substr($this->source, $this->byte); } diff --git a/src/Language/Lexer/Scanner/Scanner.php b/src/Language/Lexer/Scanner/Scanner.php index 6b7bbd7..c49e7f9 100644 --- a/src/Language/Lexer/Scanner/Scanner.php +++ b/src/Language/Lexer/Scanner/Scanner.php @@ -60,6 +60,11 @@ public function getBuffer(): Buffer return $this->branch->buffer; } + public function getRemainder(): string + { + return $this->branch->characterStream->getRemainder(); + } + public function isEnd(): bool { return $this->branch->characterStream->isEnd(); diff --git a/src/Language/Lexer/Scanner/ScannerInterface.php b/src/Language/Lexer/Scanner/ScannerInterface.php index 0ea08d3..2b5974e 100644 --- a/src/Language/Lexer/Scanner/ScannerInterface.php +++ b/src/Language/Lexer/Scanner/ScannerInterface.php @@ -28,6 +28,7 @@ interface ScannerInterface { public function getBuffer(): Buffer; + public function getRemainder(): string; public function isEnd(): bool; public function scan(RuleInterface ...$rules): ?RuleInterface; diff --git a/test/Unit/Language/Lexer/Scanner/ScannerTest.php b/test/Unit/Language/Lexer/Scanner/ScannerTest.php index e038a02..bac75cc 100644 --- a/test/Unit/Language/Lexer/Scanner/ScannerTest.php +++ b/test/Unit/Language/Lexer/Scanner/ScannerTest.php @@ -42,6 +42,7 @@ public function testInitialScannerStateWhenSourceIsEmpty(): void expectedBufferStart: Position::from(0, 0), expectedBufferEnd: Position::from(0, 0), expectedBufferContents: '', + expectedRemainder: '', expectedIsEnd: true, actualScanner: new Scanner(''), ); @@ -56,6 +57,7 @@ public function testInitialScannerStateWhenSourceIsNotEmpty(): void expectedBufferStart: Position::from(0, 0), expectedBufferEnd: Position::from(0, 0), expectedBufferContents: '', + expectedRemainder: 'A', expectedIsEnd: false, actualScanner: new Scanner('A'), ); @@ -76,6 +78,7 @@ public function scanReturnsGivenRuleAndCapturesMatchingCharactersIfGivenRuleMatc expectedBufferStart: Position::from(0, 0), expectedBufferEnd: Position::from(0, 2), expectedBufferContents: 'ABC', + expectedRemainder: '', expectedIsEnd: true, actualScanner: $scanner, ); @@ -96,6 +99,7 @@ public function scanCapturesEveryCharacterUntilMatchWasFound(): void expectedBufferStart: Position::from(0, 0), expectedBufferEnd: Position::from(0, 0), expectedBufferContents: 'A', + expectedRemainder: 'BC', expectedIsEnd: false, actualScanner: $scanner, ); @@ -107,6 +111,7 @@ public function scanCapturesEveryCharacterUntilMatchWasFound(): void expectedBufferStart: Position::from(0, 1), expectedBufferEnd: Position::from(0, 1), expectedBufferContents: 'B', + expectedRemainder: 'C', expectedIsEnd: false, actualScanner: $scanner, ); @@ -118,6 +123,7 @@ public function scanCapturesEveryCharacterUntilMatchWasFound(): void expectedBufferStart: Position::from(0, 2), expectedBufferEnd: Position::from(0, 2), expectedBufferContents: 'C', + expectedRemainder: '', expectedIsEnd: true, actualScanner: $scanner, ); @@ -138,6 +144,7 @@ public function scanReturnsNullAndCapturesAllMatchingCharactersUntilFailureIfGiv expectedBufferStart: Position::from(0, 0), expectedBufferEnd: Position::from(0, 3), expectedBufferContents: 'AABB', + expectedRemainder: 'BCC', expectedIsEnd: false, actualScanner: $scanner, ); @@ -180,6 +187,7 @@ public function scanCapturesMatchingCharactersIfGivenRuleDoesNotMatchButTheNextR expectedBufferStart: Position::from(0, 0), expectedBufferEnd: Position::from(0, 2), expectedBufferContents: 'ABC', + expectedRemainder: '', expectedIsEnd: true, actualScanner: $scanner, ); @@ -209,6 +217,7 @@ public function scanCapturesMatchingCharactersAndReturnsTheMatchingRuleIfAnyOfTh expectedBufferStart: Position::from(0, 0), expectedBufferEnd: Position::from(0, 2), expectedBufferContents: 'ABC', + expectedRemainder: '', expectedIsEnd: true, actualScanner: $scanner, ); @@ -241,6 +250,7 @@ public function scanReturnsNullAndCapturesAllMatchingCharactersUntilFailureIfNon expectedBufferStart: Position::from(0, 0), expectedBufferEnd: Position::from(0, 4), expectedBufferContents: 'AABBC', + expectedRemainder: 'CC', expectedIsEnd: false, actualScanner: $scanner, ); @@ -268,6 +278,7 @@ public function scanReturnsNullAndCapturesAllMatchingCharactersUntilFailureIfNon expectedBufferStart: Position::from(0, 3), expectedBufferEnd: Position::from(0, 5), expectedBufferContents: 'BBB', + expectedRemainder: 'BCCC', expectedIsEnd: false, actualScanner: $scanner, ); @@ -297,6 +308,7 @@ public function scanIfTwoCompetingRulesBothMatchAtTheSameOffsetTheFirstOneThatMa expectedBufferStart: Position::from(0, 0), expectedBufferEnd: Position::from(0, 1), expectedBufferContents: 'AB', + expectedRemainder: 'C', expectedIsEnd: false, actualScanner: $scanner, ); @@ -326,6 +338,7 @@ public function scanIfTwoCompetingRulesBothMatchAtDifferentOffsetsTheFirstOneTha expectedBufferStart: Position::from(0, 0), expectedBufferEnd: Position::from(0, 1), expectedBufferContents: 'AB', + expectedRemainder: 'C', expectedIsEnd: false, actualScanner: $scanner, ); @@ -368,6 +381,7 @@ public function dismissReturnsToLastPositionAfterScanMatch(): void expectedBufferStart: Position::from(0, 3), expectedBufferEnd: Position::from(0, 5), expectedBufferContents: 'BCC', + expectedRemainder: '', expectedIsEnd: true, actualScanner: $scanner, ); @@ -378,6 +392,7 @@ public function dismissReturnsToLastPositionAfterScanMatch(): void expectedBufferStart: Position::from(0, 0), expectedBufferEnd: Position::from(0, 2), expectedBufferContents: 'AAB', + expectedRemainder: 'BCC', expectedIsEnd: false, actualScanner: $scanner, ); @@ -388,6 +403,7 @@ public function dismissReturnsToLastPositionAfterScanMatch(): void expectedBufferStart: Position::from(0, 3), expectedBufferEnd: Position::from(0, 5), expectedBufferContents: 'BCC', + expectedRemainder: '', expectedIsEnd: true, actualScanner: $scanner, ); @@ -415,6 +431,7 @@ public function dismissReturnsToLastPositionAfterScanMismatch(): void expectedBufferStart: Position::from(0, 0), expectedBufferEnd: Position::from(0, 2), expectedBufferContents: 'AAA', + expectedRemainder: 'BBBCCC', expectedIsEnd: false, actualScanner: $scanner, ); @@ -423,7 +440,7 @@ public function dismissReturnsToLastPositionAfterScanMismatch(): void /** * @test */ - public function backspaceReturnsToLastPositionAfterScanOneOfMatch(): void + public function dismissReturnsToLastPositionAfterScanOneOfMatch(): void { $scanner = new Scanner('AABBCC'); $rule1 = RuleFixtures::withMatcher( @@ -444,6 +461,7 @@ public function backspaceReturnsToLastPositionAfterScanOneOfMatch(): void expectedBufferStart: Position::from(0, 2), expectedBufferEnd: Position::from(0, 3), expectedBufferContents: 'BB', + expectedRemainder: 'CC', expectedIsEnd: false, actualScanner: $scanner, ); @@ -454,6 +472,7 @@ public function backspaceReturnsToLastPositionAfterScanOneOfMatch(): void expectedBufferStart: Position::from(0, 0), expectedBufferEnd: Position::from(0, 1), expectedBufferContents: 'AA', + expectedRemainder: 'BBCC', expectedIsEnd: false, actualScanner: $scanner, ); @@ -462,7 +481,7 @@ public function backspaceReturnsToLastPositionAfterScanOneOfMatch(): void /** * @test */ - public function backspaceReturnsToLastPositionAfterScanOneOfMismatch(): void + public function dismissReturnsToLastPositionAfterScanMismatchWithMultipleRules(): void { $scanner = new Scanner('AAABBBCCC'); $rule1 = RuleFixtures::withMatcher( @@ -483,6 +502,7 @@ public function backspaceReturnsToLastPositionAfterScanOneOfMismatch(): void expectedBufferStart: Position::from(0, 2), expectedBufferEnd: Position::from(0, 5), expectedBufferContents: 'ABBB', + expectedRemainder: 'BCCC', expectedIsEnd: false, actualScanner: $scanner, ); @@ -493,6 +513,7 @@ public function backspaceReturnsToLastPositionAfterScanOneOfMismatch(): void expectedBufferStart: Position::from(0, 0), expectedBufferEnd: Position::from(0, 1), expectedBufferContents: 'AA', + expectedRemainder: 'ABBBCCC', expectedIsEnd: false, actualScanner: $scanner, ); @@ -503,6 +524,7 @@ public static function assertScannerState( Position $expectedBufferStart, Position $expectedBufferEnd, string $expectedBufferContents, + string $expectedRemainder, bool $expectedIsEnd, ScannerInterface $actualScanner, ): void { @@ -514,6 +536,11 @@ public static function assertScannerState( message: 'Buffer of scanner was incorrect' ); + self::assertEquals( + $expectedRemainder, + $actualScanner->getRemainder(), + 'Remaining source of scanner was incorrect' + ); self::assertEquals( $expectedIsEnd, $actualScanner->isEnd(), From d7e1799279030cfe7503faaae8d5d33862ba21f3 Mon Sep 17 00:00:00 2001 From: Wilhelm Behncke Date: Fri, 18 Aug 2023 16:53:05 +0200 Subject: [PATCH 18/19] TASK: Parse negative integer literals --- .../Parser/Expression/ExpressionParser.php | 2 + .../IntegerLiteral/IntegerLiteralParser.php | 14 ++- .../Expression/ExpressionParserTest.php | 92 +++++++++++++++++++ .../IntegerLiteralParserTest.php | 80 ++++++++++++++++ 4 files changed, 186 insertions(+), 2 deletions(-) diff --git a/src/Language/Parser/Expression/ExpressionParser.php b/src/Language/Parser/Expression/ExpressionParser.php index 237ceed..0f3ca23 100644 --- a/src/Language/Parser/Expression/ExpressionParser.php +++ b/src/Language/Parser/Expression/ExpressionParser.php @@ -67,6 +67,7 @@ final class ExpressionParser Rule::KEYWORD_NULL, Rule::KEYWORD_MATCH, Rule::STRING_LITERAL_DELIMITER, + Rule::SYMBOL_DASH, Rule::INTEGER_HEXADECIMAL, Rule::INTEGER_DECIMAL, Rule::INTEGER_OCTAL, @@ -154,6 +155,7 @@ private function parseUnaryStatement(Lexer $lexer): ExpressionNode $this->parseNullLiteral($lexer), Rule::STRING_LITERAL_DELIMITER => $this->parseStringLiteral($lexer), + Rule::SYMBOL_DASH, Rule::INTEGER_HEXADECIMAL, Rule::INTEGER_DECIMAL, Rule::INTEGER_OCTAL, diff --git a/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php b/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php index 583d2f3..ead23b7 100644 --- a/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php +++ b/src/Language/Parser/IntegerLiteral/IntegerLiteralParser.php @@ -43,12 +43,22 @@ final class IntegerLiteralParser public function parse(Lexer $lexer): IntegerLiteralNode { try { + $start = null; + $value = ''; + + if ($lexer->probe(Rule::SYMBOL_DASH)) { + $start = $lexer->buffer->getStart(); + $value = $lexer->buffer->getContents(); + } + $rule = $lexer->read(...self::RULES_INTEGER_FORMATS); + $start ??= $lexer->buffer->getStart(); + $value .= $lexer->buffer->getContents(); return new IntegerLiteralNode( - rangeInSource: $lexer->buffer->getRange(), + rangeInSource: $start->toRange($lexer->buffer->getEnd()), format: $this->getIntegerFormatFromToken($rule), - value: $lexer->buffer->getContents() + value: $value ); } catch (LexerException $e) { throw IntegerLiteralCouldNotBeParsed::becauseOfLexerException($e); diff --git a/test/Unit/Language/Parser/Expression/ExpressionParserTest.php b/test/Unit/Language/Parser/Expression/ExpressionParserTest.php index bf306e9..1820ccf 100644 --- a/test/Unit/Language/Parser/Expression/ExpressionParserTest.php +++ b/test/Unit/Language/Parser/Expression/ExpressionParserTest.php @@ -881,6 +881,29 @@ public function parsesBinaryIntegerLiteral(): void ); } + /** + * @test + */ + public function parsesNegativeBinaryIntegerLiteral(): void + { + $expressionParser = new ExpressionParser(); + $lexer = new Lexer('-0b1001'); + + $expectedExpressioNode = new ExpressionNode( + rangeInSource: $this->range([0, 0], [0, 6]), + root: new IntegerLiteralNode( + rangeInSource: $this->range([0, 0], [0, 6]), + format: IntegerFormat::BINARY, + value: '-0b1001' + ) + ); + + $this->assertEquals( + $expectedExpressioNode, + $expressionParser->parse($lexer) + ); + } + /** * @test */ @@ -904,6 +927,29 @@ public function parsesOctalIntegerLiteral(): void ); } + /** + * @test + */ + public function parsesNegativeOctalIntegerLiteral(): void + { + $expressionParser = new ExpressionParser(); + $lexer = new Lexer('-0o755'); + + $expectedExpressioNode = new ExpressionNode( + rangeInSource: $this->range([0, 0], [0, 5]), + root: new IntegerLiteralNode( + rangeInSource: $this->range([0, 0], [0, 5]), + format: IntegerFormat::OCTAL, + value: '-0o755' + ) + ); + + $this->assertEquals( + $expectedExpressioNode, + $expressionParser->parse($lexer) + ); + } + /** * @test */ @@ -927,6 +973,29 @@ public function parsesDecimalIntegerLiteral(): void ); } + /** + * @test + */ + public function parsesNegativeDecimalIntegerLiteral(): void + { + $expressionParser = new ExpressionParser(); + $lexer = new Lexer('-42'); + + $expectedExpressioNode = new ExpressionNode( + rangeInSource: $this->range([0, 0], [0, 2]), + root: new IntegerLiteralNode( + rangeInSource: $this->range([0, 0], [0, 2]), + format: IntegerFormat::DECIMAL, + value: '-42' + ) + ); + + $this->assertEquals( + $expectedExpressioNode, + $expressionParser->parse($lexer) + ); + } + /** * @test */ @@ -950,6 +1019,29 @@ public function parsesHexadecimalIntegerLiteral(): void ); } + /** + * @test + */ + public function parsesNegativeHexadecimalIntegerLiteral(): void + { + $expressionParser = new ExpressionParser(); + $lexer = new Lexer('-0xABC'); + + $expectedExpressioNode = new ExpressionNode( + rangeInSource: $this->range([0, 0], [0, 5]), + root: new IntegerLiteralNode( + rangeInSource: $this->range([0, 0], [0, 5]), + format: IntegerFormat::HEXADECIMAL, + value: '-0xABC' + ) + ); + + $this->assertEquals( + $expectedExpressioNode, + $expressionParser->parse($lexer) + ); + } + /** * @test */ diff --git a/test/Unit/Language/Parser/IntegerLiteral/IntegerLiteralParserTest.php b/test/Unit/Language/Parser/IntegerLiteral/IntegerLiteralParserTest.php index 99f4a8e..55570ae 100644 --- a/test/Unit/Language/Parser/IntegerLiteral/IntegerLiteralParserTest.php +++ b/test/Unit/Language/Parser/IntegerLiteral/IntegerLiteralParserTest.php @@ -53,6 +53,26 @@ public function parsesBinaryInteger(): void ); } + /** + * @test + */ + public function parsesNegativeBinaryInteger(): void + { + $integerLiteralParser = IntegerLiteralParser::singleton(); + $lexer = new Lexer('-0b1010110101'); + + $expectedIntegerLiteralNode = new IntegerLiteralNode( + rangeInSource: $this->range([0, 0], [0, 12]), + format: IntegerFormat::BINARY, + value: '-0b1010110101' + ); + + $this->assertEquals( + $expectedIntegerLiteralNode, + $integerLiteralParser->parse($lexer) + ); + } + /** * @test */ @@ -73,6 +93,26 @@ public function parsesOctalInteger(): void ); } + /** + * @test + */ + public function parsesNegativeOctalInteger(): void + { + $integerLiteralParser = IntegerLiteralParser::singleton(); + $lexer = new Lexer('-0o755'); + + $expectedIntegerLiteralNode = new IntegerLiteralNode( + rangeInSource: $this->range([0, 0], [0, 5]), + format: IntegerFormat::OCTAL, + value: '-0o755' + ); + + $this->assertEquals( + $expectedIntegerLiteralNode, + $integerLiteralParser->parse($lexer) + ); + } + /** * @test */ @@ -93,6 +133,26 @@ public function parsesDecimalInteger(): void ); } + /** + * @test + */ + public function parsesNegativeDecimalInteger(): void + { + $integerLiteralParser = IntegerLiteralParser::singleton(); + $lexer = new Lexer('-1234567890'); + + $expectedIntegerLiteralNode = new IntegerLiteralNode( + rangeInSource: $this->range([0, 0], [0, 10]), + format: IntegerFormat::DECIMAL, + value: '-1234567890' + ); + + $this->assertEquals( + $expectedIntegerLiteralNode, + $integerLiteralParser->parse($lexer) + ); + } + /** * @test */ @@ -113,6 +173,26 @@ public function parsesHexadecimalInteger(): void ); } + /** + * @test + */ + public function parsesNegativeHexadecimalInteger(): void + { + $integerLiteralParser = IntegerLiteralParser::singleton(); + $lexer = new Lexer('-0x123456789ABCDEF'); + + $expectedIntegerLiteralNode = new IntegerLiteralNode( + rangeInSource: $this->range([0, 0], [0, 17]), + format: IntegerFormat::HEXADECIMAL, + value: '-0x123456789ABCDEF' + ); + + $this->assertEquals( + $expectedIntegerLiteralNode, + $integerLiteralParser->parse($lexer) + ); + } + /** * @test */ From 9fc4801fa94f62458f094caaa1a9a3c4641c7ee4 Mon Sep 17 00:00:00 2001 From: Wilhelm Behncke Date: Fri, 18 Aug 2023 16:56:13 +0200 Subject: [PATCH 19/19] BUGFIX: Fix shebang in scripts --- scripts/analyse | 2 +- scripts/test | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/analyse b/scripts/analyse index a80c795..43a0ee1 100755 --- a/scripts/analyse +++ b/scripts/analyse @@ -1,4 +1,4 @@ -#!/bin/env bash +#!/usr/bin/env bash ## ## Usage (plain): diff --git a/scripts/test b/scripts/test index d5006e4..7d1382a 100755 --- a/scripts/test +++ b/scripts/test @@ -1,4 +1,4 @@ -#!/bin/env bash +#!/usr/bin/env bash ## ## Usage (plain):