Update codes

Introduce GitHub Action. Add tests. Increase PEP8 compatibility.
ZhiqingXiao · May 3, 2020 · 8dfe657 · 8dfe657
1 parent 4b8d0d5
commit 8dfe657
Show file tree

Hide file tree

Showing 13 changed files with 189 additions and 65 deletions.
diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
@@ -0,0 +1,39 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: Python package
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.5, 3.6, 3.7, 3.8]
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v1
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install flake8 pytest
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Test with pytest
+      run: |
+        pytest
diff --git a/README.md b/README.md
@@ -3,15 +3,18 @@ boardgame2
 
 `boardgame2` is an extension of OpenAI Gym that implements multiple two-player zero-sum 2-dimension board games, such as TicTacToe, Gomuko, and Reversi.
 
+
 ## Environments
 - `Reversi-v0`
 - `KInARow-v0`, as well as `Gomuku-v0` and `TicTacToe-v0`
-- `Go-v0` (Experimental, end-game not fully implemented)
+- `Go-v0` (Experimental, not fully implemented)
 
 ## Install
 
     pip install --upgrade boardgame2
 
+We support Windows, macOS, Linux, and other operating systems.
+
 
 ## Usage
 
@@ -61,5 +64,6 @@ This package has been published in the following book:
      title     = {Reinforcement Learning: Theory and {Python} Implementation},
      author    = {Zhiqing Xiao}
      year      = 2019,
+     month     = 8,
      publisher = {China Machine Press},
     }
diff --git a/boardgame2/env.py b/boardgame2/env.py
@@ -1,6 +1,5 @@
 import sys
 import copy
-import itertools
 
 from six import StringIO
 import numpy as np
@@ -111,23 +110,20 @@ def __init__(self, board_shape,
 
         if isinstance(board_shape, int):
             board_shape = (board_shape, board_shape)
-        assert len(board_shape) == 2 # invalid board shape
+        assert len(board_shape) == 2  # invalid board shape
         self.board = np.zeros(board_shape)
-        assert self.board.size > 1 # Invalid board shape
+        assert self.board.size > 1  # Invalid board shape
 
         observation_spaces = [
                 spaces.Box(low=-1, high=1, shape=board_shape, dtype=np.int8),
                 spaces.Box(low=-1, high=1, shape=(), dtype=np.int8)]
         self.observation_space = spaces.Tuple(observation_spaces)
-        action_spaces = [spaces.Box(low=-np.ones((2,)),
-                high=np.array(board_shape)-1, dtype=np.int8),]
-        self.action_space = spaces.Tuple(action_spaces)
-
+        self.action_space = spaces.Box(low=-np.ones((2,)),
+                high=np.array(board_shape)-1, dtype=np.int8)
 
     def seed(self, seed=None):
         return []
 
-
     def reset(self):
         """
         Reset a new game episode. See gym.Env.reset()
@@ -140,7 +136,6 @@ def reset(self):
         self.player = BLACK
         return self.board, self.player
 
-
     def is_valid(self, state, action):
         """
         Check whether the action is valid for current state.
@@ -160,7 +155,6 @@ def is_valid(self, state, action):
         x, y = action
         return board[x, y] == EMPTY
 
-
     def get_valid(self, state):
         """
         Get all valid locations for the current state.
@@ -180,7 +174,6 @@ def get_valid(self, state):
                 valid[x, y] = self.is_valid(state, np.array([x, y]))
         return valid
 
-
     def has_valid(self, state):
         """
         Check whether there are valid locations for current state.
@@ -194,14 +187,12 @@ def has_valid(self, state):
         has_valid : bool
         """
         board = state[0]
-        valid = np.zeros_like(board, dtype=np.int8)
         for x in range(board.shape[0]):
             for y in range(board.shape[1]):
                 if self.is_valid(state, np.array([x, y])):
                     return True
         return False
 
-
     def get_winner(self, state):
         """
         Check whether the game has ended. If so, who is the winner.
@@ -224,7 +215,6 @@ def get_winner(self, state):
                 return None
         return np.sign(np.nansum(board))
 
-
     def get_next_state(self, state, action):
         """
         Get the next state.
@@ -249,7 +239,6 @@ def get_next_state(self, state, action):
             board[x, y] = player
         return board, -player
 
-
     def next_step(self, state, action):
         """
         Get the next observation, reward, done, and info.
@@ -280,7 +269,6 @@ def next_step(self, state, action):
             action = self.PASS
         return state, 0., False, {}
 
-
     def step(self, action):
         """
         See gym.Env.step().
@@ -301,7 +289,6 @@ def step(self, action):
         self.board, self.player = next_state
         return next_state, reward, done, info
 
-
     def render(self, mode='human'):
         """
         See gym.Env.render().

diff --git a/boardgame2/go.py b/boardgame2/go.py
@@ -1,6 +1,7 @@
 import collections
 
 import numpy as np
+import gym.spaces as spaces
 
 from .env import EMPTY, BLACK, WHITE
 from .env import BoardGameEnv
@@ -9,12 +10,24 @@
 
 class GoJudger:
 
-
     def __init__(self, komi):
+        """
+        Parameters
+        ----
+        komi : int or float     the Komi defined by the rule of the game
+        """
         self.komi = komi
 
-
     def __call__(self, board):
+        """
+        Parameters
+        ----
+        board : np.array
+        
+        Returns
+        ----
+        winner : BLACK or WHITE
+        """
         self.board = board
 
         self.remove_dead()
@@ -31,10 +44,9 @@ def __call__(self, board):
             return BLACK
         return WHITE
 
-
     def remove_dead(self):
-        pass # TODO
-
+        # TODO: The implmentation of dead stone removal is difficult.
+        print('The dead stone removal is not implemented. All stones will be treated as live ones.')
 
     def floodfill(self, location, player):
         x, y = location
@@ -46,29 +58,27 @@ def floodfill(self, location, player):
                     self.floodfill((xx, yy), player)
 
 
-
 class GoEnv(BoardGameEnv):
     def __init__(self, board_shape=19, komi=0, allow_suicide=False,
             illegal_action_mode='pass', render_characters='+ox'):
         super().__init__(board_shape=board_shape,
-            illegal_action_mode=illegal_action_mode,
-            render_characters=render_characters)
+                illegal_action_mode=illegal_action_mode,
+                render_characters=render_characters)
         self.judger = GoJudger(komi)
         self.allow_suicide = allow_suicide
-        ko_space = spaces.Box(low=0, high=1, shape=observation_space.spaces[0].shape, dtype=np.int8)
+        obs_space = self.observation_space
+        ko_space = spaces.Box(low=0, high=1, shape=obs_space.spaces[0].shape, dtype=np.int8)
         pass_space = spaces.Discrete(2)
-        self.observation_space = spaces.Tuple(observation_space.spaces + [ko_space, pass_space])
-
+        self.observation_space = spaces.Tuple(obs_space.spaces + [ko_space, pass_space])
+        print('Go is not fully implemented. Please use it at your own risk.')
 
     def reset(self):
-        super().set_board()
         self.board = np.zeros_like(self.board, dtype=np.int8)
         self.player = BLACK
         self.ko = np.zeros_like(self.board, dtype=np.int8)
-        self.pas = False # record pass
+        self.pas = False  # record pass
         return self.board, self.player, self.ko, self.pas
 
-
     def is_valid(self, state, action):
         """
         Parameters
@@ -85,18 +95,20 @@ def is_valid(self, state, action):
         if is_index(board, action):
             return False
 
-        x, y = action
+        if len(action) == 1:
+            action, = action
+        x, y = action.tolist()
 
         if board[x, y] or ko[x, y]:
             return False
 
-        if not allow_suicide:
-            board[x, y] = player # place
+        if not self.allow_suicide:
+            board[x, y] = self.player  # place
 
             for dx, dy in [(-1, 0), (0, -1), (1, 0), (0, 1)]:
                 xx, yy = x + dx, y + dy
                 if is_index(board, (xx, yy)):
-                    if board[xx, yy] == -player:
+                    if board[xx, yy] == -self.player:
                         _, liberties = self.search(board, (xx, yy), max_liberty=1)
                         if not liberties:
                             return True
@@ -107,7 +119,6 @@ def is_valid(self, state, action):
 
         return True
 
-
     def get_winner(self, state):
         """
         Parameters
@@ -120,8 +131,13 @@ def get_winner(self, state):
             - None   if the game is not ended and the winner is not determined
             - int    the winner
         """
-        raise NotImplementedError()
-
+        print('End game is not implemented.')
+        end_game = False
+        if end_game:
+            winner = self.judger(self.board)
+            return winner
+        else:
+            return None
 
     def search(self, board, location, max_liberty=float('+inf'), max_stone=float('+inf')):
         # BFS
@@ -147,7 +163,6 @@ def search(self, board, location, max_liberty=float('+inf'), max_stone=float('+i
                             return locations, liberties
         return locations, liberties
 
-
     def get_next_state(self, state, action):
         """
         Parameters
@@ -166,7 +181,8 @@ def get_next_state(self, state, action):
         if self.is_valid(state, action):
             pas = False
 
-            board[x, y] = player # place
+            x, y = location
+            board[x, y] = player  # place
 
             suicides, my_liberties = self.search(board, (x, y), max_liberty=1)
 
@@ -177,8 +193,8 @@ def get_next_state(self, state, action):
                     if board[xx, yy] == player:
                         deletes, liberties = self.search(board, (xx, yy), max_liberty=1)
                         if not liberties:
-                            delete_count += len(locations)
-                            for x_del, y_del in locations:
+                            delete_count += len(deletes)
+                            for x_del, y_del in deletes:
                                 board[x_del, y_del] = 0
                             if delete_count == 1:
                                 ko[x_del, y_del] = 1
@@ -193,7 +209,6 @@ def get_next_state(self, state, action):
 
         return board, -player, ko, pas
 
-
     def step(self, action):
         """
         Parameters
@@ -204,11 +219,11 @@ def step(self, action):
         ----
         next_state : (np.array, int, np.array, bool)    next board and next player
         reward : float               the winner or zeros
-        done : bool           whether the game end or not
+        done : bool                  whether the game end or not
         info : {}
         """
-        x, y = action
-        if not self.valid[x, y]:
+        state = (self.board, self.player, self.ko, self.pas)
+        if not self.is_valid(state, action):
             action = self.illegal_equivalent_action
 
         if np.array_equal(action, self.RESIGN):
@@ -222,8 +237,9 @@ def step(self, action):
             winner = self.get_winner(self.board)
             if winner is not None:
                 return (self.board, self.player, self.ko, self.pas), winner, True, {}
-            if self.has_valid((self.board, self.player)):
+            state = (self.board, self.player, self.ko, self.pas)
+            if self.has_valid(state):
                 break
             self.board, self.player, self.ko, self.pas = self.get_next_state(
-                    (self.board, self.player, self.ko, self.pas), self.PASS)
+                    state, self.PASS)
         return (self.board, self.player, self.ko, self.pas), 0., False, {}
diff --git a/boardgame2/kinarow.py b/boardgame2/kinarow.py
@@ -9,11 +9,10 @@ class KInARowEnv(BoardGameEnv):
     def __init__(self, board_shape=3, target_length=3,
             illegal_action_mode='pass', render_characters='+ox'):
         super().__init__(board_shape=board_shape,
-            illegal_action_mode=illegal_action_mode,
-            render_characters=render_characters)
+                illegal_action_mode=illegal_action_mode,
+                render_characters=render_characters)
         self.target_length = target_length
 
-
     def get_winner(self, state):
         """
         Parameters
@@ -30,7 +29,7 @@ def get_winner(self, state):
         for player in [BLACK, WHITE]:
             for x in range(board.shape[0]):
                 for y in range(board.shape[1]):
-                    for dx, dy in [(1, -1), (1, 0), (1, 1), (0, 1)]: # loop on the 8 directions
+                    for dx, dy in [(1, -1), (1, 0), (1, 1), (0, 1)]:  # loop on the 8 directions
                         xx, yy = x, y
                         for count in itertools.count():
                             if not is_index(board, (xx, yy)) or board[xx, yy] != player: