-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
75 lines (58 loc) · 2.22 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import irlc
from irlc.ex09.rl_agent import TabularAgent, TabularQ
from irlc.ex09.rl_agent import TabularAgent
from qtrain import qtrain
import gym
from irlc import main_plot
import matplotlib.pyplot as plt
from irlc import savepdf
from irlc.ex11.q_agent import QAgent, RAgent
from ModSnake import *
# Set total number of episodes
n_episodes = 5000
epsilon = 1 # Exploration rate
decay_epsilon = (True, 1.45, n_episodes // 100)
alpha = 0.1 # Learning Rate
betas = [0.2]
gammas = [0.9] # Discount Factor
max_runs = 10
max_steps = 10000000
grid_sizes = [[10,10],[15,15],[20,20]]
def qsnake(grid_size, gamma):
q_exp = f"experiments/grid{grid_size[0]}x{grid_size[0]}/q_gamma{gamma}"
# Make environment instance
env = Snake_env(grid_size)
agent = QAgent(env, gamma=gamma, epsilon=epsilon, alpha=alpha)
stats, trajectories, agent = qtrain(env, agent, q_exp, num_episodes=n_episodes, max_runs=max_runs,
return_agent=True, max_steps=max_steps, decay_epsilon=decay_epsilon)
# print(stats)
return env, agent
def rsnake(grid_size, beta):
q_exp = f"experiments/grid{grid_size[0]}x{grid_size[0]}/r_beta{beta}"
# Make environment instance
env = Snake_env(grid_size)
agent = RAgent(env, alpha=alpha, beta=beta, epsilon=epsilon)
stats, trajectories, agent = qtrain(env, agent, q_exp, num_episodes=n_episodes, max_runs=max_runs,
return_agent=True, max_steps=max_steps, decay_epsilon=decay_epsilon)
return env, agent
for grid_size in grid_sizes:
for _ in range(5):
for gamma in gammas:
qsnake(grid_size, gamma)
for beta in betas:
rsnake(grid_size, beta)
# while True:
# observation = env.reset() # Constructs an instance of the game
# snakes_remaining = 1
# while snakes_remaining != 0:
# env.render()
# action = agent.Q.get_optimal_action(observation)
# observation, reward, done, info = env.step(action)
# snakes_remaining = info['snakes_remaining']
# # print('OBS: ' , observation)
# print(observation)
# # print('Reward: ' , reward)
# # print('Done: ' , done)
# # print('Info: ' , info)
#
# env.close()