-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathenv.py
More file actions
96 lines (88 loc) · 3.16 KB
/
env.py
File metadata and controls
96 lines (88 loc) · 3.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import numpy as np
class InvalidMoveException(Exception):
pass
class GridWorld:
"""
Responsible for the game.
Walls represent cells of the grid we cannot enter. A transition to these can't happen.
Pitfalls represent cells of the grid that will end the game if reached, with a negative reward.
The goal is the only cell returning a reward and ending the game.
"""
def __init__(
self,
walls: np.array,
pitfalls: np.array,
size: tuple = (9, 9),
start: tuple = (0, 0),
goal: tuple = (8, 8),
):
"""
Inits a new game.
:param walls: the walls of the game
:param pitfalls: the pitfalls of the game
:param size: the size of the grid
:param start: the starting position
:param goal: the goal position
"""
assert goal not in pitfalls, "Goal cannot be a pitfall"
assert start not in pitfalls, "Start cannot be a pitfall"
assert goal not in walls, "Goal cannot be a wall"
assert start not in walls, "Start cannot be a wall"
self.walls = walls
self.pitfalls = pitfalls
self.size = size
self.start = start
self.goal = goal
self.reset()
self.walls = walls
def reset(self):
"""
Resets the game.
"""
self.player_position = list(self.start)
self.done = False
def step(self, move: str):
"""
Steps the agent in a direction.
:param move: the direction to move: ["up", "down", "left", "right"]
"""
assert move in ["up", "down", "left", "right"], "Invalid move"
target = self.player_position[:]
if move == "up" and self.player_position[0] > 0:
target[0] -= 1
elif move == "down" and self.player_position[0] < self.size[0] - 1:
target[0] += 1
elif move == "left" and self.player_position[1] > 0:
target[1] -= 1
elif move == "right" and self.player_position[1] < self.size[1] - 1:
target[1] += 1
else:
return tuple(self.player_position), 0, False
# raise InvalidMoveException(
# f"Invalid move: can't go {move} in {self.player_position}"
# )
if tuple(target) == self.goal:
self.done = True
return tuple(self.player_position), 50, True
elif tuple(target) in self.pitfalls:
self.done = True
return tuple(self.player_position), -50, True
elif tuple(target) in self.walls:
return tuple(self.player_position), 0, False
else:
self.player_position = target
return tuple(self.player_position), 0, False
def __repr__(self):
"""
Represents the game status graphically.
"""
if self.done:
return f"Game over"
nice_repr = np.full(self.size, "🏝")
for wall in self.walls:
nice_repr[wall] = "🚧"
for pit in self.pitfalls:
nice_repr[pit] = "🕳"
nice_repr[self.goal] = "🏆"
nice_repr[tuple(self.player_position)] = "🐒"
return f"GridWorld:\n{nice_repr}"