Skip to content

Commit

Permalink
Update codes
Browse files Browse the repository at this point in the history
Introduce GitHub Action.
Add tests.
Increase PEP8 compatibility.
  • Loading branch information
ZhiqingXiao committed May 3, 2020
1 parent 4b8d0d5 commit 8dfe657
Show file tree
Hide file tree
Showing 13 changed files with 189 additions and 65 deletions.
39 changes: 39 additions & 0 deletions .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Python package

on:
push:
branches: [ master ]
pull_request:
branches: [ master ]

jobs:
build:

runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.5, 3.6, 3.7, 3.8]

steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
pytest
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,18 @@ boardgame2

`boardgame2` is an extension of OpenAI Gym that implements multiple two-player zero-sum 2-dimension board games, such as TicTacToe, Gomuko, and Reversi.


## Environments
- `Reversi-v0`
- `KInARow-v0`, as well as `Gomuku-v0` and `TicTacToe-v0`
- `Go-v0` (Experimental, end-game not fully implemented)
- `Go-v0` (Experimental, not fully implemented)

## Install

pip install --upgrade boardgame2

We support Windows, macOS, Linux, and other operating systems.


## Usage

Expand Down Expand Up @@ -61,5 +64,6 @@ This package has been published in the following book:
title = {Reinforcement Learning: Theory and {Python} Implementation},
author = {Zhiqing Xiao}
year = 2019,
month = 8,
publisher = {China Machine Press},
}
21 changes: 4 additions & 17 deletions boardgame2/env.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import sys
import copy
import itertools

from six import StringIO
import numpy as np
Expand Down Expand Up @@ -111,23 +110,20 @@ def __init__(self, board_shape,

if isinstance(board_shape, int):
board_shape = (board_shape, board_shape)
assert len(board_shape) == 2 # invalid board shape
assert len(board_shape) == 2 # invalid board shape
self.board = np.zeros(board_shape)
assert self.board.size > 1 # Invalid board shape
assert self.board.size > 1 # Invalid board shape

observation_spaces = [
spaces.Box(low=-1, high=1, shape=board_shape, dtype=np.int8),
spaces.Box(low=-1, high=1, shape=(), dtype=np.int8)]
self.observation_space = spaces.Tuple(observation_spaces)
action_spaces = [spaces.Box(low=-np.ones((2,)),
high=np.array(board_shape)-1, dtype=np.int8),]
self.action_space = spaces.Tuple(action_spaces)

self.action_space = spaces.Box(low=-np.ones((2,)),
high=np.array(board_shape)-1, dtype=np.int8)

def seed(self, seed=None):
return []


def reset(self):
"""
Reset a new game episode. See gym.Env.reset()
Expand All @@ -140,7 +136,6 @@ def reset(self):
self.player = BLACK
return self.board, self.player


def is_valid(self, state, action):
"""
Check whether the action is valid for current state.
Expand All @@ -160,7 +155,6 @@ def is_valid(self, state, action):
x, y = action
return board[x, y] == EMPTY


def get_valid(self, state):
"""
Get all valid locations for the current state.
Expand All @@ -180,7 +174,6 @@ def get_valid(self, state):
valid[x, y] = self.is_valid(state, np.array([x, y]))
return valid


def has_valid(self, state):
"""
Check whether there are valid locations for current state.
Expand All @@ -194,14 +187,12 @@ def has_valid(self, state):
has_valid : bool
"""
board = state[0]
valid = np.zeros_like(board, dtype=np.int8)
for x in range(board.shape[0]):
for y in range(board.shape[1]):
if self.is_valid(state, np.array([x, y])):
return True
return False


def get_winner(self, state):
"""
Check whether the game has ended. If so, who is the winner.
Expand All @@ -224,7 +215,6 @@ def get_winner(self, state):
return None
return np.sign(np.nansum(board))


def get_next_state(self, state, action):
"""
Get the next state.
Expand All @@ -249,7 +239,6 @@ def get_next_state(self, state, action):
board[x, y] = player
return board, -player


def next_step(self, state, action):
"""
Get the next observation, reward, done, and info.
Expand Down Expand Up @@ -280,7 +269,6 @@ def next_step(self, state, action):
action = self.PASS
return state, 0., False, {}


def step(self, action):
"""
See gym.Env.step().
Expand All @@ -301,7 +289,6 @@ def step(self, action):
self.board, self.player = next_state
return next_state, reward, done, info


def render(self, mode='human'):
"""
See gym.Env.render().
Expand Down
78 changes: 47 additions & 31 deletions boardgame2/go.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import collections

import numpy as np
import gym.spaces as spaces

from .env import EMPTY, BLACK, WHITE
from .env import BoardGameEnv
Expand All @@ -9,12 +10,24 @@

class GoJudger:


def __init__(self, komi):
"""
Parameters
----
komi : int or float the Komi defined by the rule of the game
"""
self.komi = komi


def __call__(self, board):
"""
Parameters
----
board : np.array
Returns
----
winner : BLACK or WHITE
"""
self.board = board

self.remove_dead()
Expand All @@ -31,10 +44,9 @@ def __call__(self, board):
return BLACK
return WHITE


def remove_dead(self):
pass # TODO

# TODO: The implmentation of dead stone removal is difficult.
print('The dead stone removal is not implemented. All stones will be treated as live ones.')

def floodfill(self, location, player):
x, y = location
Expand All @@ -46,29 +58,27 @@ def floodfill(self, location, player):
self.floodfill((xx, yy), player)



class GoEnv(BoardGameEnv):
def __init__(self, board_shape=19, komi=0, allow_suicide=False,
illegal_action_mode='pass', render_characters='+ox'):
super().__init__(board_shape=board_shape,
illegal_action_mode=illegal_action_mode,
render_characters=render_characters)
illegal_action_mode=illegal_action_mode,
render_characters=render_characters)
self.judger = GoJudger(komi)
self.allow_suicide = allow_suicide
ko_space = spaces.Box(low=0, high=1, shape=observation_space.spaces[0].shape, dtype=np.int8)
obs_space = self.observation_space
ko_space = spaces.Box(low=0, high=1, shape=obs_space.spaces[0].shape, dtype=np.int8)
pass_space = spaces.Discrete(2)
self.observation_space = spaces.Tuple(observation_space.spaces + [ko_space, pass_space])

self.observation_space = spaces.Tuple(obs_space.spaces + [ko_space, pass_space])
print('Go is not fully implemented. Please use it at your own risk.')

def reset(self):
super().set_board()
self.board = np.zeros_like(self.board, dtype=np.int8)
self.player = BLACK
self.ko = np.zeros_like(self.board, dtype=np.int8)
self.pas = False # record pass
self.pas = False # record pass
return self.board, self.player, self.ko, self.pas


def is_valid(self, state, action):
"""
Parameters
Expand All @@ -85,18 +95,20 @@ def is_valid(self, state, action):
if is_index(board, action):
return False

x, y = action
if len(action) == 1:
action, = action
x, y = action.tolist()

if board[x, y] or ko[x, y]:
return False

if not allow_suicide:
board[x, y] = player # place
if not self.allow_suicide:
board[x, y] = self.player # place

for dx, dy in [(-1, 0), (0, -1), (1, 0), (0, 1)]:
xx, yy = x + dx, y + dy
if is_index(board, (xx, yy)):
if board[xx, yy] == -player:
if board[xx, yy] == -self.player:
_, liberties = self.search(board, (xx, yy), max_liberty=1)
if not liberties:
return True
Expand All @@ -107,7 +119,6 @@ def is_valid(self, state, action):

return True


def get_winner(self, state):
"""
Parameters
Expand All @@ -120,8 +131,13 @@ def get_winner(self, state):
- None if the game is not ended and the winner is not determined
- int the winner
"""
raise NotImplementedError()

print('End game is not implemented.')
end_game = False
if end_game:
winner = self.judger(self.board)
return winner
else:
return None

def search(self, board, location, max_liberty=float('+inf'), max_stone=float('+inf')):
# BFS
Expand All @@ -147,7 +163,6 @@ def search(self, board, location, max_liberty=float('+inf'), max_stone=float('+i
return locations, liberties
return locations, liberties


def get_next_state(self, state, action):
"""
Parameters
Expand All @@ -166,7 +181,8 @@ def get_next_state(self, state, action):
if self.is_valid(state, action):
pas = False

board[x, y] = player # place
x, y = location
board[x, y] = player # place

suicides, my_liberties = self.search(board, (x, y), max_liberty=1)

Expand All @@ -177,8 +193,8 @@ def get_next_state(self, state, action):
if board[xx, yy] == player:
deletes, liberties = self.search(board, (xx, yy), max_liberty=1)
if not liberties:
delete_count += len(locations)
for x_del, y_del in locations:
delete_count += len(deletes)
for x_del, y_del in deletes:
board[x_del, y_del] = 0
if delete_count == 1:
ko[x_del, y_del] = 1
Expand All @@ -193,7 +209,6 @@ def get_next_state(self, state, action):

return board, -player, ko, pas


def step(self, action):
"""
Parameters
Expand All @@ -204,11 +219,11 @@ def step(self, action):
----
next_state : (np.array, int, np.array, bool) next board and next player
reward : float the winner or zeros
done : bool whether the game end or not
done : bool whether the game end or not
info : {}
"""
x, y = action
if not self.valid[x, y]:
state = (self.board, self.player, self.ko, self.pas)
if not self.is_valid(state, action):
action = self.illegal_equivalent_action

if np.array_equal(action, self.RESIGN):
Expand All @@ -222,8 +237,9 @@ def step(self, action):
winner = self.get_winner(self.board)
if winner is not None:
return (self.board, self.player, self.ko, self.pas), winner, True, {}
if self.has_valid((self.board, self.player)):
state = (self.board, self.player, self.ko, self.pas)
if self.has_valid(state):
break
self.board, self.player, self.ko, self.pas = self.get_next_state(
(self.board, self.player, self.ko, self.pas), self.PASS)
state, self.PASS)
return (self.board, self.player, self.ko, self.pas), 0., False, {}
7 changes: 3 additions & 4 deletions boardgame2/kinarow.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@ class KInARowEnv(BoardGameEnv):
def __init__(self, board_shape=3, target_length=3,
illegal_action_mode='pass', render_characters='+ox'):
super().__init__(board_shape=board_shape,
illegal_action_mode=illegal_action_mode,
render_characters=render_characters)
illegal_action_mode=illegal_action_mode,
render_characters=render_characters)
self.target_length = target_length


def get_winner(self, state):
"""
Parameters
Expand All @@ -30,7 +29,7 @@ def get_winner(self, state):
for player in [BLACK, WHITE]:
for x in range(board.shape[0]):
for y in range(board.shape[1]):
for dx, dy in [(1, -1), (1, 0), (1, 1), (0, 1)]: # loop on the 8 directions
for dx, dy in [(1, -1), (1, 0), (1, 1), (0, 1)]: # loop on the 8 directions
xx, yy = x, y
for count in itertools.count():
if not is_index(board, (xx, yy)) or board[xx, yy] != player:
Expand Down
Loading

0 comments on commit 8dfe657

Please sign in to comment.