first gridworld

# Основной код со средой где призрак сражается с пиратами

import numpy as np
import gym
from gym import spaces


class GoLeftEnv(gym.Env):
  """
  Custom Environment that follows gym interface.
  This is a simple env where the agent must learn to go always left. 
  """
  # Because of google colab, we cannot implement the GUI ('human' render mode)
  metadata = {'render.modes': ['console']}
  # Define constants for clearer code  
  ATTACK = 0
  ATTACK2 = 1
  ATTACK3 = 2
  RIGHT = 3
  LEFT = 4
  HIGH = 5
  LOW = 6
  

  def __init__(self, grid_size=100):
    super(GoLeftEnv, self).__init__()

    # Size of the 1D-grid
    self.grid_size = grid_size
    # Initialize the agent at the right of the grid
    self.agent_pos = grid_size - 1
    self.ghost = 36
    self.firstpiratehealth = 10
    self.secondpiratehealth = 10
    self.thirdpiratehealth = 10
    self.ghostdamage = 2
    self.firstpiratedamage = 2
    self.secondpiratedamage = 2
    self.thirdpiratedamage = 2
    self.coordinatex = 1
    self.coordinatey = 1
    # Define action and observation space
    # They must be gym.spaces objects
    # Example when using discrete actions, we have two: left and right
    n_actions = 7
    self.action_space = spaces.Discrete(n_actions)
    # The observation will be the coordinate of the agent
    # this can be described both by Discrete and Box space
    self.observation_space = spaces.Box(low=-100, high=100,
                                        shape=(5,2), dtype=np.float32)

  def reset(self):
    """
    Important: the observation must be a numpy array
    :return: (np.array) 
    """
    # Initialize the agent at the right of the grid
    #self.agent_pos = self.grid_size - 1
    self.ghost = 36
    self.firstpiratehealth = 10
    self.secondpiratehealth = 10
    self.thirdpiratehealth = 10
    self.ghostdamage = 2
    self.firstpiratedamage = 2
    self.secondpiratedamage = 2
    self.thirdpiratedamage = 2
    self.coordinatex = 1
    self.coordinatey = 1
    # here we convert to float32 to make it more general (in case we want to use continuous actions)
    return np.array([[self.ghost, self.ghostdamage], [self.firstpiratehealth, self.firstpiratedamage], [self.secondpiratehealth, self.secondpiratedamage], [self.thirdpiratehealth, self.thirdpiratedamage],
                     [self.coordinatex, self.coordinatey]]).astype(np.float32)

  def step(self, action):
    if action == self.ATTACK:
     # self.agent_pos -= 1
      #self.firstpiratehealth = self.firstpiratehealth - self.ghostdamage
      self.firstpiratehealth = self.firstpiratehealth
    elif action == self.ATTACK2:
      #self.secondpiratehealth = self.secondpiratehealth - self.ghostdamage
      self.secondpiratehealth = self.secondpiratehealth
    elif action == self.ATTACK3:
      #self.thirdpiratehealth = self.thirdpiratehealth - self.ghostdamage
      self.thirdpiratehealth = self.thirdpiratehealth 
    elif action == self.RIGHT:
      if self.coordinatex < 9:
        self.coordinatex +=1
      else: 
        self.coordinatex = self.coordinatex
    elif action == self.LEFT:
      if self.coordinatex > 0:
        self.coordinatex -=1
      else: 
        self.coordinatex = self.coordinatex
    elif action == self.HIGH:
      if self.coordinatey < 9:
        self.coordinatey +=1
      else: 
        self.coordinatey = self.coordinatey
    elif action == self.LOW:
      if self.coordinatey > 0:
        self.coordinatey -=1
      else: 
        self.coordinatey = self.coordinatey
     # self.agent_pos += 1
      #self.ghost = self.ghost + 2
    else:
      raise ValueError("Received invalid action={} which is not part of the action space".format(action))

    if self.firstpiratehealth > 0:
      self.ghost = self.ghost 
    else:
      self.ghost = self.ghost 

    if self.secondpiratehealth > 0:
      self.ghost = self.ghost 
    else:
      self.ghost = self.ghost 

    if self.thirdpiratehealth > 0:
      self.ghost = self.ghost 
    else:
      self.ghost = self.ghost 

 
    #done = bool((self.secondpiratehealth <= 0 and self.firstpiratehealth <= 0 and self.thirdpiratehealth <= 0) or self.ghost <= 0) 
    done = bool(self.coordinatey == 5 and self.coordinatex == 5)

   # if (self.secondpiratehealth <= 0 and self.firstpiratehealth <= 0 and self.thirdpiratehealth <= 0 and self.ghost > 0):
    #  reward = 100 
    if self.coordinatey == 5 and self.coordinatex == 5:
          reward = 1000 
    else:
          reward = -1

    # Optionally we can pass additional info, we are not using that for now
    info = {}

    return np.array([[self.ghost, self.ghostdamage], [self.firstpiratehealth, self.firstpiratedamage], [self.secondpiratehealth, self.secondpiratedamage], [self.thirdpiratehealth, self.thirdpiratedamage],
                     [self.coordinatex, self.coordinatey]]).astype(np.float32), reward, done, info

  def render(self, mode='console'):
    print("Здоровье призрака - ")
    print(self.ghost)
    print("Здоровье первого пирата - ")
    print(self.firstpiratehealth)
    print("Здоровье второго пирата - ")
    print(self.secondpiratehealth)
    print("Здоровье третьего пирата - ")
    print(self.thirdpiratehealth)
    print("Координата X")
    print(self.coordinatex)
    print("Координата Y")
    print(self.coordinatey)
    if mode != 'console':
      raise NotImplementedError()

  def close(self):
    pass