-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSudoku_env.py
executable file
·128 lines (113 loc) · 5.2 KB
/
Sudoku_env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import numpy as np
from copy import deepcopy
class SudokuEnv():
def __init__(self):
self.state = [0,3,4,0,4,0,0,2,1,0,0,3,0,2,1,0] #initialise the state
# Start the first round
self.reset()
def empty_positions(self, state): #returns list of locations of empty positions on the grid
emp_pos = []
for ix in range(len(state)):
if state[ix]==0:
emp_pos.append(ix)
return emp_pos
def get_action(self, state):
possible_locations = self.empty_positions(state)
loc = np.random.choice(possible_locations)
action = np.random.choice(range(1,5))
return (action,loc)
def transition(self, state, action): #state is an array, action is tuple (number, location)
new_state = deepcopy(state) #to avoid in-place substitution
act = action[0]
loc = action[1]
new_state[loc] = act
return new_state
def reward(self, action, next_state):
if action[1]==0: #if location == 0
if action[0] in (next_state[1],next_state[2],next_state[3],next_state[4],next_state[8],next_state[12]):
r = -10
else:
r=0
elif action[1]==1: #if location == 1
if action[0] in (next_state[0],next_state[2],next_state[3],next_state[5],next_state[9],next_state[13]):
r = -10
else:
r=0
elif action[1]==2: #if location == 2
if action[0] in (next_state[0],next_state[1],next_state[3],next_state[6],next_state[10],next_state[14]):
r = -10
else:
r=0
elif action[1]==3: #if location == 3
if action[0] in (next_state[0],next_state[1],next_state[2],next_state[7],next_state[11],next_state[15]):
r = -10
else:
r=0
elif action[1]==4: #if location == 4
if action[0] in (next_state[5],next_state[6],next_state[7],next_state[0],next_state[8],next_state[12]):
r = -10
else:
r=0
elif action[1]==5: #if location == 5
if action[0] in (next_state[4],next_state[6],next_state[7],next_state[1],next_state[9],next_state[13]):
r = -10
else:
r=0
elif action[1]==6: #if location == 6
if action[0] in (next_state[4],next_state[5],next_state[7],next_state[2],next_state[10],next_state[14]):
r = -10
else:
r=0
elif action[1]==7: #if location == 7
if action[0] in (next_state[4],next_state[5],next_state[6],next_state[3],next_state[11],next_state[15]):
r = -10
else:
r=0
elif action[1]==8: #if location == 8
if action[0] in (next_state[9],next_state[10],next_state[11],next_state[0],next_state[4],next_state[12]):
r = -10
else:
r=0
elif action[1]==9: #if location == 9
if action[0] in (next_state[8],next_state[10],next_state[11],next_state[1],next_state[5],next_state[13]):
r = -10
else:
r=0
elif action[1]==10: #if location == 10
if action[0] in (next_state[8],next_state[9],next_state[11],next_state[2],next_state[6],next_state[14]):
r = -10
else:
r=0
elif action[1]==11: #if location == 11
if action[0] in (next_state[8],next_state[9],next_state[10],next_state[3],next_state[7],next_state[15]):
r = -10
else:
r=0
elif action[1]==12: #if location == 12
if action[0] in (next_state[13],next_state[14],next_state[15],next_state[0],next_state[4],next_state[8]):
r = -10
else:
r=0
elif action[1]==13: #if location == 13
if action[0] in (next_state[12],next_state[14],next_state[15],next_state[1],next_state[5],next_state[9]):
r = -10
else:
r=0
elif action[1]==14: #if location == 14
if action[0] in (next_state[12],next_state[13],next_state[15],next_state[2],next_state[6],next_state[14]):
r = -10
else:
r=0
elif action[1]==15: #if location == 15
if action[0] in (next_state[12],next_state[13],next_state[14],next_state[3],next_state[7],next_state[11]):
r = -10
else:
r=0
return r
def step(self, state, action):
current_state = deepcopy(state) #to avoid in-place substitution
next_state = self.transition(current_state, action) #next_state
reward1 = self.reward(action, next_state)
return next_state, reward1
def reset(self):
return self.state