Skip to content

Commit

Permalink
Update TD3/DDPG defaults and upgrade to MuJoCo v4 envs (#430)
Browse files Browse the repository at this point in the history
* Update TD3/DDPG defaults and upgrade to MuJoCo v4 envs

* Update SB3 version
  • Loading branch information
araffin authored Jan 16, 2024
1 parent 28dc228 commit 8cecab4
Show file tree
Hide file tree
Showing 14 changed files with 132 additions and 116 deletions.
18 changes: 18 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
## Release 2.3.0a1 (WIP)

### Breaking Changes
- Updated defaults hyperparameters for TD3/DDPG to be more consistent with SAC
- Upgraded MuJoCo envs hyperparameters to v4 (pre-trained agents need to be updated)
- Upgraded to SB3 >= 2.3.0

### New Features


### Bug fixes

### Documentation

### Other



## Release 2.2.1 (2023-11-17)

### Breaking Changes
Expand Down
12 changes: 6 additions & 6 deletions hyperparams/a2c.yml
Original file line number Diff line number Diff line change
Expand Up @@ -165,24 +165,24 @@ ReacherBulletEnv-v0:

# === Mujoco Envs ===

HalfCheetah-v3: &mujoco-defaults
HalfCheetah-v4: &mujoco-defaults
normalize: true
n_timesteps: !!float 1e6
policy: 'MlpPolicy'

Ant-v3:
Ant-v4:
<<: *mujoco-defaults

Hopper-v3:
Hopper-v4:
<<: *mujoco-defaults

Walker2d-v3:
Walker2d-v4:
<<: *mujoco-defaults

Humanoid-v3:
Humanoid-v4:
<<: *mujoco-defaults
n_timesteps: !!float 2e6

Swimmer-v3:
Swimmer-v4:
<<: *mujoco-defaults
gamma: 0.9999
12 changes: 6 additions & 6 deletions hyperparams/ars.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ ReacherBulletEnv-v0:

# === Mujoco Envs ===
# Params closest to original paper
Swimmer-v3:
Swimmer-v4:
n_envs: 1
policy: 'LinearPolicy'
n_timesteps: !!float 2e6
Expand All @@ -119,7 +119,7 @@ Swimmer-v3:
alive_bonus_offset: 0
# normalize: "dict(norm_obs=True, norm_reward=False)"

Hopper-v3:
Hopper-v4:
n_envs: 1
policy: 'LinearPolicy'
n_timesteps: !!float 7e6
Expand All @@ -130,7 +130,7 @@ Hopper-v3:
alive_bonus_offset: -1
normalize: "dict(norm_obs=True, norm_reward=False)"

HalfCheetah-v3:
HalfCheetah-v4:
n_envs: 1
policy: 'LinearPolicy'
n_timesteps: !!float 1.25e7
Expand All @@ -141,7 +141,7 @@ HalfCheetah-v3:
alive_bonus_offset: 0
normalize: "dict(norm_obs=True, norm_reward=False)"

Walker2d-v3:
Walker2d-v4:
n_envs: 1
policy: 'LinearPolicy'
n_timesteps: !!float 7.5e7
Expand All @@ -152,7 +152,7 @@ Walker2d-v3:
alive_bonus_offset: -1
normalize: "dict(norm_obs=True, norm_reward=False)"

Ant-v3:
Ant-v4:
n_envs: 1
policy: 'LinearPolicy'
n_timesteps: !!float 7.5e7
Expand All @@ -164,7 +164,7 @@ Ant-v3:
normalize: "dict(norm_obs=True, norm_reward=False)"


Humanoid-v3:
Humanoid-v4:
n_envs: 1
policy: 'LinearPolicy'
n_timesteps: !!float 2.5e8
Expand Down
58 changes: 29 additions & 29 deletions hyperparams/ddpg.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ MountainCarContinuous-v0:
policy: 'MlpPolicy'
noise_type: 'ornstein-uhlenbeck'
noise_std: 0.5
gradient_steps: 1
train_freq: 1
learning_rate: !!float 1e-3
batch_size: 256
policy_kwargs: "dict(net_arch=[400, 300])"

Pendulum-v1:
n_timesteps: 20000
Expand All @@ -13,8 +18,8 @@ Pendulum-v1:
learning_starts: 10000
noise_type: 'normal'
noise_std: 0.1
gradient_steps: -1
train_freq: [1, "episode"]
gradient_steps: 1
train_freq: 1
learning_rate: !!float 1e-3
policy_kwargs: "dict(net_arch=[400, 300])"

Expand All @@ -26,8 +31,8 @@ LunarLanderContinuous-v2:
learning_starts: 10000
noise_type: 'normal'
noise_std: 0.1
gradient_steps: -1
train_freq: [1, "episode"]
gradient_steps: 1
train_freq: 1
learning_rate: !!float 1e-3
policy_kwargs: "dict(net_arch=[400, 300])"

Expand All @@ -39,23 +44,23 @@ BipedalWalker-v3:
learning_starts: 10000
noise_type: 'normal'
noise_std: 0.1
gradient_steps: -1
train_freq: [1, "episode"]
gradient_steps: 1
train_freq: 1
learning_rate: !!float 1e-3
policy_kwargs: "dict(net_arch=[400, 300])"

# To be tuned
BipedalWalkerHardcore-v3:
n_timesteps: !!float 1e7
policy: 'MlpPolicy'
gamma: 0.98
buffer_size: 200000
gamma: 0.99
buffer_size: 1000000
learning_starts: 10000
noise_type: 'normal'
noise_std: 0.1
gradient_steps: -1
train_freq: [1, "episode"]
learning_rate: !!float 1e-3
batch_size: 256
train_freq: 1
learning_rate: lin_7e-4
policy_kwargs: "dict(net_arch=[400, 300])"

# Tuned
Expand All @@ -69,28 +74,21 @@ HalfCheetahBulletEnv-v0: &pybullet-defaults
noise_std: 0.1
gradient_steps: 1
train_freq: 1
learning_rate: !!float 1e-3
batch_size: 256
learning_rate: !!float 7e-4
policy_kwargs: "dict(net_arch=[400, 300])"

# Tuned
AntBulletEnv-v0:
<<: *pybullet-defaults
learning_rate: !!float 7e-4
policy_kwargs: "dict(net_arch=[400, 300])"

# Tuned
HopperBulletEnv-v0:
<<: *pybullet-defaults
train_freq: 64
gradient_steps: 64
batch_size: 256
learning_rate: !!float 7e-4

# Tuned
Walker2DBulletEnv-v0:
<<: *pybullet-defaults
batch_size: 256
learning_rate: !!float 7e-4

# TO BE tested
HumanoidBulletEnv-v0:
Expand Down Expand Up @@ -123,29 +121,31 @@ InvertedPendulumSwingupBulletEnv-v0:
n_timesteps: !!float 3e5

# === Mujoco Envs ===

HalfCheetah-v3: &mujoco-defaults
HalfCheetah-v4: &mujoco-defaults
n_timesteps: !!float 1e6
policy: 'MlpPolicy'
learning_starts: 10000
noise_type: 'normal'
noise_std: 0.1
train_freq: 1
gradient_steps: 1
learning_rate: !!float 1e-3
batch_size: 256
policy_kwargs: "dict(net_arch=[400, 300])"

Ant-v3:
Ant-v4:
<<: *mujoco-defaults

Hopper-v3:
Hopper-v4:
<<: *mujoco-defaults

Walker2d-v3:
Walker2d-v4:
<<: *mujoco-defaults

Humanoid-v3:
Humanoid-v4:
<<: *mujoco-defaults
n_timesteps: !!float 2e6

Swimmer-v3:
Swimmer-v4:
<<: *mujoco-defaults
gamma: 0.9999
train_freq: 1
gradient_steps: 1
22 changes: 11 additions & 11 deletions hyperparams/ppo.yml
Original file line number Diff line number Diff line change
Expand Up @@ -380,28 +380,28 @@ CarRacing-v2:


# === Mujoco Envs ===
# HalfCheetah-v3: &mujoco-defaults
# HalfCheetah-v4: &mujoco-defaults
# normalize: true
# n_timesteps: !!float 1e6
# policy: 'MlpPolicy'

Ant-v3: &mujoco-defaults
Ant-v4: &mujoco-defaults
normalize: true
n_timesteps: !!float 1e6
policy: 'MlpPolicy'

# Hopper-v3:
# Hopper-v4:
# <<: *mujoco-defaults
#
# Walker2d-v3:
# Walker2d-v4:
# <<: *mujoco-defaults
#
# Humanoid-v3:
# Humanoid-v4:
# <<: *mujoco-defaults
# n_timesteps: !!float 2e6
#
# tuned
Swimmer-v3:
Swimmer-v4:
<<: *mujoco-defaults
gamma: 0.9999
n_envs: 4
Expand All @@ -413,7 +413,7 @@ Swimmer-v3:
# Tuned
# 10 mujoco envs

HalfCheetah-v3:
HalfCheetah-v4:
normalize: true
n_envs: 1
policy: 'MlpPolicy'
Expand All @@ -435,7 +435,7 @@ HalfCheetah-v3:
net_arch=dict(pi=[256, 256], vf=[256, 256])
)"

# Ant-v3:
# Ant-v4:
# normalize: true
# n_envs: 1
# policy: 'MlpPolicy'
Expand All @@ -451,7 +451,7 @@ HalfCheetah-v3:
# max_grad_norm: 0.6
# vf_coef: 0.677239

Hopper-v3:
Hopper-v4:
normalize: true
n_envs: 1
policy: 'MlpPolicy'
Expand Down Expand Up @@ -495,7 +495,7 @@ HumanoidStandup-v2:
net_arch=dict(pi=[256, 256], vf=[256, 256])
)"

Humanoid-v3:
Humanoid-v4:
normalize: true
n_envs: 1
policy: 'MlpPolicy'
Expand Down Expand Up @@ -565,7 +565,7 @@ Reacher-v2:
max_grad_norm: 0.9
vf_coef: 0.950368

Walker2d-v3:
Walker2d-v4:
normalize: true
n_envs: 1
policy: 'MlpPolicy'
Expand Down
Loading

0 comments on commit 8cecab4

Please sign in to comment.