-
Notifications
You must be signed in to change notification settings - Fork 29
/
atari_environment.py
97 lines (75 loc) · 3.08 KB
/
atari_environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import numpy as np
import os
import random
from state import State
from ale_python_interface import ALEInterface
# Terminology in this class:
# Episode: the span of one game life
# Game: an ALE game (e.g. in space invaders == 3 Episodes or 3 Lives)
# Frame: An ALE frame (e.g. 60 fps)
# Step: An Environment step (e.g. covers 4 frames)
#
class AtariEnvironment:
def __init__(self, args, outputDir):
self.outputDir = outputDir
self.screenCaptureFrequency = args.screen_capture_freq
self.ale = ALEInterface()
self.ale.setInt(b'random_seed', 123456)
random.seed(123456)
# Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo
self.ale.setFloat(b'repeat_action_probability', 0.0)
# Load the ROM file
self.ale.loadROM(args.rom)
self.actionSet = self.ale.getMinimalActionSet()
self.gameNumber = 0
self.stepNumber = 0
self.resetGame()
def getNumActions(self):
return len(self.actionSet)
def getState(self):
return self.state
def getGameNumber(self):
return self.gameNumber
def getFrameNumber(self):
return self.ale.getFrameNumber()
def getEpisodeFrameNumber(self):
return self.ale.getEpisodeFrameNumber()
def getEpisodeStepNumber(self):
return self.episodeStepNumber
def getStepNumber(self):
return self.stepNumber
def getGameScore(self):
return self.gameScore
def isGameOver(self):
return self.ale.game_over()
def step(self, action):
previousLives = self.ale.lives()
reward = 0
isTerminal = 0
self.stepNumber += 1
self.episodeStepNumber += 1
for i in range(4):
prevScreenRGB = self.ale.getScreenRGB()
reward += self.ale.act(self.actionSet[action])
screenRGB = self.ale.getScreenRGB()
# Detect end of episode, I don't think I'm handling this right in terms
# of the overall game loop (??)
if self.ale.lives() < previousLives or self.ale.game_over():
isTerminal = 1
break
if self.gameNumber % self.screenCaptureFrequency == 0:
dir = self.outputDir + '/screen_cap/game-%06d' % (self.gameNumber)
if not os.path.isdir(dir):
os.makedirs(dir)
self.ale.saveScreenPNG(dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber()))
maxedScreen = np.maximum(screenRGB, prevScreenRGB)
self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber())
self.gameScore += reward
return reward, self.state, isTerminal
def resetGame(self):
if self.ale.game_over():
self.gameNumber += 1
self.ale.reset_game()
self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber())
self.gameScore = 0
self.episodeStepNumber = 0 # environment steps vs ALE frames. Will probably be 4*frame number