-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathПризрак и три пирата
111 lines (92 loc) · 3.66 KB
/
Призрак и три пирата
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# Основной код со средой где призрак сражаются с пиратами
import numpy as np
import gym
from gym import spaces
class GoLeftEnv(gym.Env):
"""
Custom Environment that follows gym interface.
This is a simple env where the agent must learn to go always left.
"""
# Because of google colab, we cannot implement the GUI ('human' render mode)
metadata = {'render.modes': ['console']}
# Define constants for clearer code
ATTACK = 0
ATTACK2 = 1
ATTACK3 = 2
def __init__(self, grid_size=100):
super(GoLeftEnv, self).__init__()
# Size of the 1D-grid
self.grid_size = grid_size
# Initialize the agent at the right of the grid
self.agent_pos = grid_size - 1
self.ghost = 70
self.firstpiratehealth = 10
self.secondpiratehealth = 10
self.thirdpiratehealth = 10
# Define action and observation space
# They must be gym.spaces objects
# Example when using discrete actions, we have two: left and right
n_actions = 3
self.action_space = spaces.Discrete(n_actions)
# The observation will be the coordinate of the agent
# this can be described both by Discrete and Box space
self.observation_space = spaces.Box(low=-100, high=100,
shape=(4,), dtype=np.float32)
def reset(self):
"""
Important: the observation must be a numpy array
:return: (np.array)
"""
# Initialize the agent at the right of the grid
#self.agent_pos = self.grid_size - 1
self.ghost = 64
self.firstpiratehealth = 10
self.secondpiratehealth = 10
self.thirdpiratehealth = 10
# here we convert to float32 to make it more general (in case we want to use continuous actions)
return np.array([self.ghost, self.firstpiratehealth, self.secondpiratehealth, self.thirdpiratehealth]).astype(np.float32)
def step(self, action):
if action == self.ATTACK:
# self.agent_pos -= 1
self.firstpiratehealth = self.firstpiratehealth - 2
elif action == self.ATTACK2:
self.secondpiratehealth = self.secondpiratehealth - 2
elif action == self.ATTACK3:
self.thirdpiratehealth = self.thirdpiratehealth - 2
# self.agent_pos += 1
#self.ghost = self.ghost + 2
else:
raise ValueError("Received invalid action={} which is not part of the action space".format(action))
if self.firstpiratehealth > 0:
self.ghost = self.ghost - 2
else:
self.ghost = self.ghost
if self.secondpiratehealth > 0:
self.ghost = self.ghost - 2
else:
self.ghost = self.ghost
if self.thirdpiratehealth > 0:
self.ghost = self.ghost - 2
else:
self.ghost = self.ghost
done = bool((self.secondpiratehealth <= 0 and self.firstpiratehealth <= 0 and self.thirdpiratehealth <= 0) or self.ghost <= 0)
if (self.secondpiratehealth <= 0 and self.firstpiratehealth <= 0 and self.thirdpiratehealth <= 0 and self.ghost > 0):
reward = 100
else:
reward = 0
# Optionally we can pass additional info, we are not using that for now
info = {}
return np.array([self.ghost, self.firstpiratehealth, self.secondpiratehealth, self.thirdpiratehealth]).astype(np.float32), reward, done, info
def render(self, mode='console'):
print("Здоровье призрака - ")
print(self.ghost)
print("Здоровье первого пирата - ")
print(self.firstpiratehealth)
print("Здоровье второго пирата - ")
print(self.secondpiratehealth)
print("Здоровье третьего пирата - ")
print(self.thirdpiratehealth)
if mode != 'console':
raise NotImplementedError()
def close(self):
pass