Skip to content

Commit

Permalink
Merge pull request #86 from sumnerevans/decode-pgn-fixes
Browse files Browse the repository at this point in the history
decodePGN: convert to a more resillient parser
  • Loading branch information
notnil authored Dec 18, 2021
2 parents b6d4fbd + 7301e8b commit d773447
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 70 deletions.
1 change: 1 addition & 0 deletions fixtures/pgns/0008.pgn
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1.e4 e6 2.d4 d5
1 change: 1 addition & 0 deletions fixtures/pgns/0009.pgn
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1. e4 e5 2. Nf3 Nc6 3. Bb5 a6 {This opening is called the Ruy Lopez.}
1 change: 1 addition & 0 deletions fixtures/pgns/0010.pgn
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1. e4 e5 2. Nf3 Nc6 3. Bb5 a6{This opening is called the Ruy Lopez.}
80 changes: 19 additions & 61 deletions pgn.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,6 @@ func (a multiDecoder) Decode(pos *Position, s string) (*Move, error) {
func decodePGN(pgn string) (*Game, error) {
tagPairs := getTagPairs(pgn)
moveComments, outcome := moveListWithComments(pgn)
// moveStrs, outcome := moveList(pgn)
gameFuncs := []func(*Game){}
for _, tp := range tagPairs {
if strings.ToLower(tp.Key) == "fen" {
Expand Down Expand Up @@ -229,74 +228,33 @@ type moveWithComment struct {
Comments []string
}

var moveListTokenRe = regexp.MustCompile(`(?:\d+\.)|(O-O(?:-O)?|\w*[abcdefgh][12345678]\w*(?:=Q)?)|(?:\{([^}]*)\})|(?:\([^)]*\))|(\*|0-1|1-0|1\/2-1\/2)`)

func moveListWithComments(pgn string) ([]moveWithComment, Outcome) {
text := stripTagPairs(pgn)
// remove variations
text = removeSection(`\(`, `\)`, text)
text = strings.Replace(text, "\n", " ", -1)
text = strings.TrimSpace(text)
tokens := strings.Split(text, " ")
pgn = stripTagPairs(pgn)
var outcome Outcome
moves := []moveWithComment{}
inComment := false
commentTokens := []string{}
tokenLoop:
for _, token := range tokens {
token = strings.TrimSpace(token)
switch token {
case "{":
inComment = true
commentTokens = []string{}
case "}":
inComment = false
if len(moves) > 0 {
moves[len(moves)-1].Comments = append(moves[len(moves)-1].Comments, strings.Join(commentTokens, " "))
}
case "":
case string(NoOutcome), string(WhiteWon), string(BlackWon), string(Draw):
outcome = Outcome(token)
break tokenLoop
default:
if inComment {
commentTokens = append(commentTokens, token)
break
}
if strings.HasSuffix(token, ".") {
break
}
moves = append(moves, moveWithComment{MoveStr: token})

for _, match := range moveListTokenRe.FindAllStringSubmatch(pgn, -1) {
move, commentText, outcomeText := match[1], match[2], match[3]
if len(move+commentText+outcomeText) == 0 {
continue
}
}
return moves, outcome
}

func moveList(pgn string) ([]string, Outcome) {
// remove comments
text := removeSection("{", "}", pgn)
// remove variations
text = removeSection(`\(`, `\)`, text)
// remove tag pairs
text = removeSection(`\[`, `\]`, text)
// remove line breaks
text = strings.Replace(text, "\n", " ", -1)
if outcomeText != "" {
outcome = Outcome(outcomeText)
break
}

list := strings.Split(text, " ")
filtered := []string{}
var outcome Outcome
for _, move := range list {
move = strings.TrimSpace(move)
switch move {
case string(NoOutcome), string(WhiteWon), string(BlackWon), string(Draw):
outcome = Outcome(move)
case "":
default:
results := moveNumRegex.FindStringSubmatch(move)
if len(results) == 2 && results[1] != "" {
filtered = append(filtered, results[1])
}
if commentText != "" {
moves[len(moves)-1].Comments = append(moves[len(moves)-1].Comments, strings.TrimSpace(commentText))
}

if move != "" {
moves = append(moves, moveWithComment{MoveStr: move})
}
}
return filtered, outcome
return moves, outcome
}

func removeSection(leftChar, rightChar, s string) string {
Expand Down
56 changes: 47 additions & 9 deletions pgn_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,18 @@ var (
PostPos: unsafeFEN("r3kb1r/2qp1pp1/b1n1p2p/pp2P3/5n1B/1PPQ1N2/P1BN1PPP/R3K2R w KQkq - 1 14"),
PGN: mustParsePGN("fixtures/pgns/0004.pgn"),
},
{
PostPos: unsafeFEN("rnbqkbnr/ppp2ppp/4p3/3p4/3PP3/8/PPP2PPP/RNBQKBNR w KQkq d6 0 3"),
PGN: mustParsePGN("fixtures/pgns/0008.pgn"),
},
{
PostPos: unsafeFEN("r1bqkbnr/1ppp1ppp/p1n5/1B2p3/4P3/5N2/PPPP1PPP/RNBQK2R w KQkq - 0 4"),
PGN: mustParsePGN("fixtures/pgns/0009.pgn"),
},
{
PostPos: unsafeFEN("r1bqkbnr/1ppp1ppp/p1n5/1B2p3/4P3/5N2/PPPP1PPP/RNBQK2R w KQkq - 0 4"),
PGN: mustParsePGN("fixtures/pgns/0010.pgn"),
},
}
)

Expand All @@ -47,16 +59,42 @@ func TestValidPGNs(t *testing.T) {
}
}

func TestCommentsDetection(t *testing.T) {
pgn := mustParsePGN("fixtures/pgns/0005.pgn")
game, err := decodePGN(pgn)
if err != nil {
t.Fatal(err)
type commentTest struct {
PGN string
MoveNumber int
CommentText string
}

var (
commentTests = []commentTest{
{
PGN: mustParsePGN("fixtures/pgns/0005.pgn"),
MoveNumber: 7,
CommentText: `(-0.25 → 0.39) Inaccuracy. cxd4 was best. [%eval 0.39] [%clk 0:05:05]`,
},
{
PGN: mustParsePGN("fixtures/pgns/0009.pgn"),
MoveNumber: 5,
CommentText: `This opening is called the Ruy Lopez.`,
},
{
PGN: mustParsePGN("fixtures/pgns/0010.pgn"),
MoveNumber: 5,
CommentText: `This opening is called the Ruy Lopez.`,
},
}
comment := strings.Join(game.Comments()[7], " ")
expected := `Inaccuracy. cxd4 was best. [%eval 0.39] [%clk 0:05:05]`
if comment != expected {
t.Fatalf("expected pgn comment to be %s but got %s", expected, comment)
)

func TestCommentsDetection(t *testing.T) {
for _, test := range commentTests {
game, err := decodePGN(test.PGN)
if err != nil {
t.Fatal(err)
}
comment := strings.Join(game.Comments()[test.MoveNumber], " ")
if comment != test.CommentText {
t.Fatalf("expected pgn comment to be %s but got %s", test.CommentText, comment)
}
}
}

Expand Down

0 comments on commit d773447

Please sign in to comment.