From d117fc20cbca201b0e0084dce07261406e8e0842 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Koz=C5=82owski?= Date: Mon, 2 Oct 2023 03:15:30 +0200 Subject: [PATCH] Rework error matching --- .../test/scala/playground/Assertions.scala | 2 +- .../smithyql/parser/v2/scanner.scala | 45 ++++++++++++------- .../smithyql/parser/v2/ScannerTests.scala | 26 +++++++++-- 3 files changed, 53 insertions(+), 20 deletions(-) diff --git a/modules/ast/src/test/scala/playground/Assertions.scala b/modules/ast/src/test/scala/playground/Assertions.scala index 9dd6bed4..23207841 100644 --- a/modules/ast/src/test/scala/playground/Assertions.scala +++ b/modules/ast/src/test/scala/playground/Assertions.scala @@ -23,7 +23,7 @@ object Assertions extends Expectations.Helpers { val stringWithResets = d.show()(conf).linesWithSeparators.map(Console.RESET + _).mkString failure( - s"Diff failed:\n${Console.RESET}(${conf.right("expected")}, ${conf.left("actual")})\n\n" + stringWithResets + s"Diff failed:\n${Console.RESET}(${conf.left("expected")}, ${conf.right("actual")})\n\n" + stringWithResets ) } diff --git a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala index 58cfac48..6fea2b15 100644 --- a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala +++ b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala @@ -66,9 +66,10 @@ object Scanner { def readSimple( token: Char, tok: TokenKind, - ): PartialFunction[Char, Unit] = { case `token` => - add(tok(token.toString)) - remaining = remaining.tail + ): PartialFunction[Unit, Unit] = { + case _ if remaining.startsWith(token.toString()) => + add(tok(token.toString)) + remaining = remaining.drop(token.toString().length()) } def simpleTokens( @@ -76,11 +77,9 @@ object Scanner { Char, TokenKind, )* - ): PartialFunction[Char, Unit] = pairings - .map(readSimple.tupled) - .reduce(_ orElse _) + ): PartialFunction[Unit, Unit] = pairings.map(readSimple.tupled).reduce(_.orElse(_)) - val readOne: PartialFunction[Char, Unit] = simpleTokens( + def readOne: PartialFunction[Unit, Unit] = simpleTokens( '.' -> TokenKind.DOT, ',' -> TokenKind.COMMA, '#' -> TokenKind.HASH, @@ -91,7 +90,7 @@ object Scanner { ':' -> TokenKind.COLON, '=' -> TokenKind.EQ, ).orElse { - case letter if letter.isLetter => + case _ if remaining.head.isLetter => val (letters, rest) = remaining.span(ch => ch.isLetterOrDigit || ch == '_') add(TokenKind.IDENT(letters)) remaining = rest @@ -145,8 +144,17 @@ object Scanner { ) = { // todo: bug: even if the next character starts a multi-char token, this will consider it an error. // instead, we should rework "readOne" to consume arbitrary constant-length tokens, and also include the possibility that `rest` has comments or whitespace. - val (failures, rest) = remaining.span(!readOne.isDefinedAt(_)) - remaining = rest + val (failures, _) = remaining.span { _ => + if (readOne.isDefinedAt(())) + // this will match. stop! + false + else { + // didn't match. We need to move the cursor manually here + remaining = remaining.tail + true + } + } + if (failures.nonEmpty) { add(TokenKind.Error(failures)) true @@ -157,13 +165,18 @@ object Scanner { while (remaining.nonEmpty) { val last = remaining - readOne.applyOrElse[Char, Any]( - remaining.head, - (_: Char) => - // nothing matched. Eat whitespace and see if the rest is an error - eatWhitespace() || eatComments() || eatErrors(), - ) + { + val matched = readOne.isDefinedAt(()) + if (matched) + readOne(()) + + matched + } || + eatWhitespace() || + eatComments() || + eatErrors(): Unit + // last-effort sanity check if (remaining == last) sys.error(s"no progress in the last run! remaining string: $remaining") } diff --git a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala index 36d2c4cf..f15b793d 100644 --- a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala +++ b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala @@ -2,8 +2,10 @@ package playground.smithyql.parser.v2 import cats.effect.IO import cats.implicits._ +import com.softwaremill.diffx.Diff import org.scalacheck.Arbitrary import org.scalacheck.Gen +import playground.Assertions import playground.smithyql.parser.v2.scanner.Scanner import playground.smithyql.parser.v2.scanner.Token import playground.smithyql.parser.v2.scanner.TokenKind @@ -14,6 +16,9 @@ import Scanner.scan object ScannerTests extends SimpleIOSuite with Checkers { + implicit val tokenKindDiff: Diff[TokenKind] = Diff.derived + implicit val tokenDiff: Diff[Token] = Diff.derived + def arbTests( name: TestName )( @@ -51,17 +56,19 @@ object ScannerTests extends SimpleIOSuite with Checkers { private def scanTest( input: String, - explicitName: String = "", + explicitName: TestName = "", )( expected: List[Token] + )( + implicit loc: SourceLocation ): Unit = pureTest( - if (explicitName.nonEmpty) + if (explicitName.name.nonEmpty) explicitName else "Scan string: " + sanitize(input) ) { - assert.eql(expected, scan(input)) + Assertions.assertNoDiff(scan(input), expected) } // Runs scanTest by first rendering the expected tokens to a string, then scanning it to get them back. @@ -72,6 +79,8 @@ object ScannerTests extends SimpleIOSuite with Checkers { explicitName: String )( expected: List[Token] + )( + implicit loc: SourceLocation ): Unit = scanTest(expected.foldMap(_.text), explicitName)(expected) private def sanitize( @@ -162,6 +171,17 @@ object ScannerTests extends SimpleIOSuite with Checkers { scanTest( explicitName = "Error tokens mixed between other tokens", + input = "hello@world", + )( + List( + TokenKind.IDENT("hello"), + TokenKind.Error("@"), + TokenKind.IDENT("world"), + ) + ) + + scanTest( + explicitName = "Error tokens mixed between other tokens - complex", input = "hello@world-this?is=an