From 7301e8b8fe096bec62a62d4dd233b9f7a200e568 Mon Sep 17 00:00:00 2001 From: Sumner Evans Date: Sun, 12 Dec 2021 16:28:58 -0700 Subject: [PATCH] decodePGN: convert to a more resillient parser There were two main bugs that this fixes. * Fixes #77: fixed parsing of PGNs where there is no space after the move number indicator. This used to work, but #76 caused a regression. * Fixes #78: using the regular expression allows for comments that don't have spaces separating the `{` and `}` tokens. --- fixtures/pgns/0008.pgn | 1 + fixtures/pgns/0009.pgn | 1 + fixtures/pgns/0010.pgn | 1 + pgn.go | 80 ++++++++++-------------------------------- pgn_test.go | 56 ++++++++++++++++++++++++----- 5 files changed, 69 insertions(+), 70 deletions(-) create mode 100644 fixtures/pgns/0008.pgn create mode 100644 fixtures/pgns/0009.pgn create mode 100644 fixtures/pgns/0010.pgn diff --git a/fixtures/pgns/0008.pgn b/fixtures/pgns/0008.pgn new file mode 100644 index 0000000..d526f23 --- /dev/null +++ b/fixtures/pgns/0008.pgn @@ -0,0 +1 @@ +1.e4 e6 2.d4 d5 diff --git a/fixtures/pgns/0009.pgn b/fixtures/pgns/0009.pgn new file mode 100644 index 0000000..8f972c8 --- /dev/null +++ b/fixtures/pgns/0009.pgn @@ -0,0 +1 @@ +1. e4 e5 2. Nf3 Nc6 3. Bb5 a6 {This opening is called the Ruy Lopez.} diff --git a/fixtures/pgns/0010.pgn b/fixtures/pgns/0010.pgn new file mode 100644 index 0000000..065dd59 --- /dev/null +++ b/fixtures/pgns/0010.pgn @@ -0,0 +1 @@ +1. e4 e5 2. Nf3 Nc6 3. Bb5 a6{This opening is called the Ruy Lopez.} diff --git a/pgn.go b/pgn.go index a290a4b..f21e091 100644 --- a/pgn.go +++ b/pgn.go @@ -151,7 +151,6 @@ func (a multiDecoder) Decode(pos *Position, s string) (*Move, error) { func decodePGN(pgn string) (*Game, error) { tagPairs := getTagPairs(pgn) moveComments, outcome := moveListWithComments(pgn) - // moveStrs, outcome := moveList(pgn) gameFuncs := []func(*Game){} for _, tp := range tagPairs { if strings.ToLower(tp.Key) == "fen" { @@ -229,74 +228,33 @@ type moveWithComment struct { Comments []string } +var moveListTokenRe = regexp.MustCompile(`(?:\d+\.)|(O-O(?:-O)?|\w*[abcdefgh][12345678]\w*(?:=Q)?)|(?:\{([^}]*)\})|(?:\([^)]*\))|(\*|0-1|1-0|1\/2-1\/2)`) + func moveListWithComments(pgn string) ([]moveWithComment, Outcome) { - text := stripTagPairs(pgn) - // remove variations - text = removeSection(`\(`, `\)`, text) - text = strings.Replace(text, "\n", " ", -1) - text = strings.TrimSpace(text) - tokens := strings.Split(text, " ") + pgn = stripTagPairs(pgn) var outcome Outcome moves := []moveWithComment{} - inComment := false - commentTokens := []string{} -tokenLoop: - for _, token := range tokens { - token = strings.TrimSpace(token) - switch token { - case "{": - inComment = true - commentTokens = []string{} - case "}": - inComment = false - if len(moves) > 0 { - moves[len(moves)-1].Comments = append(moves[len(moves)-1].Comments, strings.Join(commentTokens, " ")) - } - case "": - case string(NoOutcome), string(WhiteWon), string(BlackWon), string(Draw): - outcome = Outcome(token) - break tokenLoop - default: - if inComment { - commentTokens = append(commentTokens, token) - break - } - if strings.HasSuffix(token, ".") { - break - } - moves = append(moves, moveWithComment{MoveStr: token}) + + for _, match := range moveListTokenRe.FindAllStringSubmatch(pgn, -1) { + move, commentText, outcomeText := match[1], match[2], match[3] + if len(move+commentText+outcomeText) == 0 { + continue } - } - return moves, outcome -} -func moveList(pgn string) ([]string, Outcome) { - // remove comments - text := removeSection("{", "}", pgn) - // remove variations - text = removeSection(`\(`, `\)`, text) - // remove tag pairs - text = removeSection(`\[`, `\]`, text) - // remove line breaks - text = strings.Replace(text, "\n", " ", -1) + if outcomeText != "" { + outcome = Outcome(outcomeText) + break + } - list := strings.Split(text, " ") - filtered := []string{} - var outcome Outcome - for _, move := range list { - move = strings.TrimSpace(move) - switch move { - case string(NoOutcome), string(WhiteWon), string(BlackWon), string(Draw): - outcome = Outcome(move) - case "": - default: - results := moveNumRegex.FindStringSubmatch(move) - if len(results) == 2 && results[1] != "" { - filtered = append(filtered, results[1]) - } + if commentText != "" { + moves[len(moves)-1].Comments = append(moves[len(moves)-1].Comments, strings.TrimSpace(commentText)) + } + + if move != "" { + moves = append(moves, moveWithComment{MoveStr: move}) } } - return filtered, outcome + return moves, outcome } func removeSection(leftChar, rightChar, s string) string { diff --git a/pgn_test.go b/pgn_test.go index b3cab18..9088c7b 100644 --- a/pgn_test.go +++ b/pgn_test.go @@ -30,6 +30,18 @@ var ( PostPos: unsafeFEN("r3kb1r/2qp1pp1/b1n1p2p/pp2P3/5n1B/1PPQ1N2/P1BN1PPP/R3K2R w KQkq - 1 14"), PGN: mustParsePGN("fixtures/pgns/0004.pgn"), }, + { + PostPos: unsafeFEN("rnbqkbnr/ppp2ppp/4p3/3p4/3PP3/8/PPP2PPP/RNBQKBNR w KQkq d6 0 3"), + PGN: mustParsePGN("fixtures/pgns/0008.pgn"), + }, + { + PostPos: unsafeFEN("r1bqkbnr/1ppp1ppp/p1n5/1B2p3/4P3/5N2/PPPP1PPP/RNBQK2R w KQkq - 0 4"), + PGN: mustParsePGN("fixtures/pgns/0009.pgn"), + }, + { + PostPos: unsafeFEN("r1bqkbnr/1ppp1ppp/p1n5/1B2p3/4P3/5N2/PPPP1PPP/RNBQK2R w KQkq - 0 4"), + PGN: mustParsePGN("fixtures/pgns/0010.pgn"), + }, } ) @@ -47,16 +59,42 @@ func TestValidPGNs(t *testing.T) { } } -func TestCommentsDetection(t *testing.T) { - pgn := mustParsePGN("fixtures/pgns/0005.pgn") - game, err := decodePGN(pgn) - if err != nil { - t.Fatal(err) +type commentTest struct { + PGN string + MoveNumber int + CommentText string +} + +var ( + commentTests = []commentTest{ + { + PGN: mustParsePGN("fixtures/pgns/0005.pgn"), + MoveNumber: 7, + CommentText: `(-0.25 → 0.39) Inaccuracy. cxd4 was best. [%eval 0.39] [%clk 0:05:05]`, + }, + { + PGN: mustParsePGN("fixtures/pgns/0009.pgn"), + MoveNumber: 5, + CommentText: `This opening is called the Ruy Lopez.`, + }, + { + PGN: mustParsePGN("fixtures/pgns/0010.pgn"), + MoveNumber: 5, + CommentText: `This opening is called the Ruy Lopez.`, + }, } - comment := strings.Join(game.Comments()[7], " ") - expected := `Inaccuracy. cxd4 was best. [%eval 0.39] [%clk 0:05:05]` - if comment != expected { - t.Fatalf("expected pgn comment to be %s but got %s", expected, comment) +) + +func TestCommentsDetection(t *testing.T) { + for _, test := range commentTests { + game, err := decodePGN(test.PGN) + if err != nil { + t.Fatal(err) + } + comment := strings.Join(game.Comments()[test.MoveNumber], " ") + if comment != test.CommentText { + t.Fatalf("expected pgn comment to be %s but got %s", test.CommentText, comment) + } } }