some ai

import numpy as np
import gym
from gym import spaces


class GoLeftEnv(gym.Env):
  """
  Custom Environment that follows gym interface.
  This is a simple env where the agent must learn to go always left. 
  """
  # Because of google colab, we cannot implement the GUI ('human' render mode)
  metadata = {'render.modes': ['console']}
  # Define constants for clearer code  
  ATTACK = 0
  ATTACK2 = 1


  def __init__(self, grid_size=10):
    super(GoLeftEnv, self).__init__()

    # Size of the 1D-grid
    self.grid_size = grid_size
    # Initialize the agent at the right of the grid
    self.agent_pos = grid_size - 1
    self.ghost = 24
    self.firstpiratehealth = 10
    self.secondpiratehealth = 10
    #self.firstpiratehealth = 20
    # Define action and observation space
    # They must be gym.spaces objects
    # Example when using discrete actions, we have two: left and right
    n_actions = 2
    self.action_space = spaces.Discrete(n_actions)
    # The observation will be the coordinate of the agent
    # this can be described both by Discrete and Box space
    self.observation_space = spaces.Box(low=0, high=self.grid_size,
                                        shape=(1,), dtype=np.float32)

  def reset(self):
    """
    Important: the observation must be a numpy array
    :return: (np.array) 
    """
    # Initialize the agent at the right of the grid
    self.agent_pos = self.grid_size - 1
    self.ghost = 24
    self.firstpiratehealth = 10
    self.secondpiratehealth = 10
    # here we convert to float32 to make it more general (in case we want to use continuous actions)
    return np.array([self.agent_pos]).astype(np.float32)

  def step(self, action):
    if action == self.ATTACK:
     # self.agent_pos -= 1
      self.firstpiratehealth = self.firstpiratehealth - 2
    elif action == self.ATTACK2:
      self.secondpiratehealth = self.secondpiratehealth - 2
     # self.agent_pos += 1
      #self.ghost = self.ghost + 2
    else:
      raise ValueError("Received invalid action={} which is not part of the action space".format(action))

    # Account for the boundaries of the grid
    self.agent_pos = np.clip(self.agent_pos, 0, self.grid_size)


    if self.firstpiratehealth != 0:
      self.ghost = self.ghost - 2 
    else:
      self.ghost = self.ghost 

    if self.secondpiratehealth != 0:
      self.ghost = self.ghost - 2 
    else:
      self.ghost = self.ghost 

 
     # else
      #self.ghost = self.ghost - 2

    # Are we at the left of the grid?
    #done = bool(self.agent_pos == 0)
    if self.ghost == 0:
      done = 1

      if (self.firstpiratehealth == 0 and self.secondpiratehealth == 0):
        done = 1
      

    #done = bool(self.ghost == 0) or (bool(self.firstpiratehealth == 0) or bool(self.secondpiratehealth == 0))

    # Null reward everywhere except when reaching the goal (left of the grid)
    #reward = 1 if self.agent_pos == 0 else 0
    if (self.secondpiratehealth == 0 and self.firstpiratehealth == 0):
      reward = 1 
    else:
      reward = 0 

    # Optionally we can pass additional info, we are not using that for now
    info = {}

    return np.array([self.agent_pos]).astype(np.float32), reward, done, info

  def render(self, mode='console'):
    if mode != 'console':
      raise NotImplementedError()
    #print(self.ghost)
    #print("piratetwo")
    #print(self.firstpiratehealth)
    #print(self.secondpiratehealth)

  def close(self):
    pass