1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
| import numpy as np
class MazeEnv: def __init__(self): self.maze = np.array([[0, 0, 0, 0, 0], [0, 1, 1, 1, 0], [0, 0, 0, 1, 0], [1, 1, 0, 0, 0], [0, 0, 0, 1, 0], [0, 1, 0, 0, 0]]) self.start_position = (0, 0) self.goal_position = (5, 4) self.current_position = self.start_position
def reset(self): self.current_position = self.start_position return self.current_position
def step(self, action): if action == 0: next_position = (self.current_position[0] - 1, self.current_position[1]) elif action == 1: next_position = (self.current_position[0] + 1, self.current_position[1]) elif action == 2: next_position = (self.current_position[0], self.current_position[1] - 1) elif action == 3: next_position = (self.current_position[0], self.current_position[1] + 1)
if self.is_valid_move(next_position): self.current_position = next_position
reward = 1 if self.current_position == self.goal_position else -0.1 done = self.current_position == self.goal_position return self.current_position, reward, done
def is_valid_move(self, position): return (0 <= position[0] < self.maze.shape[0] and 0 <= position[1] < self.maze.shape[1] and self.maze[position[0], position[1]] == 0)
def render(self): maze_copy = self.maze.copy() maze_copy[self.current_position] = 0.5 maze_copy[self.goal_position] = 2 print(maze_copy)
|