-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_fix.py
102 lines (76 loc) · 3.16 KB
/
test_fix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import numpy as np
import gym
import random
import sys
import cvxpy as cp
N_idx = 20
F_idx = 4
GAMMA = 0.99
def idx_to_state2(env, state):
env_low = env.observation_space.low
env_high = env.observation_space.high
env_distance = (env_high - env_low) / N_idx
#position_idx = int((state[0] - env_low[0]) / env_distance[0])
#velocity_idx = int((state[1] - env_low[1]) / env_distance[1])
position_idx = int((state[0, 0] - env_low[0]) / env_distance[0])
velocity_idx = int((state[0, 1] - env_low[1]) / env_distance[1])
state_idx = position_idx + velocity_idx * N_idx
return state_idx
def idx_to_state0(env, state):
#print(f"State shape: {state.shape}")
env_low = env.observation_space.low
env_high = env.observation_space.high
env_distance = (env_high - env_low) / N_idx
position_idx = int((state[0] - env_low[0]) / env_distance[0])
velocity_idx = int((state[1] - env_low[1]) / env_distance[1])
state_idx = position_idx + velocity_idx * N_idx
return state_idx
def idx_to_state(env, state):
env_low = env.observation_space.low
env_high = env.observation_space.high
env_distance = (env_high - env_low) / N_idx
if state.ndim == 1:
# For 1D state (single observation)
position_idx = int((state[0] - env_low[0]) / env_distance[0])
velocity_idx = int((state[1] - env_low[1]) / env_distance[1])
elif state.ndim == 2:
# For 2D state (multiple observations)
position_idx = np.round((state[:, 0] - env_low[0]) / env_distance[0]).astype(int)
velocity_idx = np.round((state[:, 1] - env_low[1]) / env_distance[1]).astype(int)
else:
raise ValueError("Unsupported state dimensions")
state_idx = position_idx + velocity_idx * N_idx
return state_idx
if __name__ == '__main__':
print(":: Testing APP-learning.\n")
# Load the agent
n_states = N_idx**2 # position - 20, velocity - 20
n_actions = 3
q_table = np.load(file="results/app_q_table.npy")
# Create a new game instance.
#env = gym.make('MountainCar-v0')
env = gym.make('MountainCar-v0', render_mode='human')
#env = gym.make('MountainCar-v0', render_mode='rgb_array')
n_episode = 10 # test the agent 10times
scores = []
for ep in range(n_episode):
state_tuple = env.reset()
state = state_tuple[0] # Extract the state from the tuple
score = 0
while True:
# Render the play
env.render()
state_idx = idx_to_state(env, state)
action = np.argmax(q_table[state_idx])
# Unpack only the necessary values from the returned tuple
next_state_tuple = env.step(action)
next_state, reward, done, _ = next_state_tuple[:4]
next_state = next_state[0] if isinstance(next_state, tuple) else next_state # Extract the next state from the tuple
next_state_idx = idx_to_state(env, next_state)
score += reward
state = next_state
if done:
print('{} episode | score: {:.1f}'.format(ep + 1, score))
break
env.close()
sys.exit()