Skip to content

Commit

Permalink
docs: Update docs and examples related to GymNE (#31)
Browse files Browse the repository at this point in the history
* docs: Update docs and examples related to GymNE

* Update docs and examples related to GymNE

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

* Update rl_clipup script

* Use keyword arguments for VecGymNE in Brax example notebook

* Clarify what PicklingLogger does in example codes

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Vojtech Micka <vojtech@nnaisense.com>
  • Loading branch information
3 people authored Oct 24, 2022
1 parent cd7f84b commit a232d04
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 22 deletions.
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,12 @@ The following example demonstrates how to solve reinforcement learning tasks tha

```python
from evotorch.algorithms import PGPE
from evotorch.logging import StdOutLogger
from evotorch.logging import StdOutLogger, PicklingLogger
from evotorch.neuroevolution import GymNE

# Declare the problem to solve
problem = GymNE(
env_name="Humanoid-v4", # Solve the Humanoid-v4 task
env="Humanoid-v4", # Solve the Humanoid-v4 task
network="Linear(obs_length, act_length)", # Linear policy
observation_normalization=True, # Normalize the policy inputs
decrease_rewards_by=5.0, # Decrease each reward by 5.0
Expand Down Expand Up @@ -156,6 +156,11 @@ searcher = PGPE(
# Instantiate a standard output logger
_ = StdOutLogger(searcher)

# Optional: Instantiate a logger to pickle and save the results periodically.
# In this example, among the saved results will be the center of the search
# distribution, since we are using PGPE which is distribution-based.
_ = PicklingLogger(searcher, interval=10)

# Run the algorithm for the specified amount of generations
searcher.run(500)

Expand Down
2 changes: 1 addition & 1 deletion docs/advanced_usage/custom_ea.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ With our custom EA defined, we're ready to run a simple experiment. For this we
from evotorch.neuroevolution import GymNE

prob = GymNE(
env_name="CartPole-v1",
env="CartPole-v1",
network="""
Linear(obs_length, 16)
>> Tanh()
Expand Down
2 changes: 1 addition & 1 deletion docs/advanced_usage/dist_based.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ from evotorch.logging import StdOutLogger

# Declare the problem to solve
problem = GymNE(
env_name="Humanoid-v4", # Solve the Humanoid-v4 task
env="Humanoid-v4", # Solve the Humanoid-v4 task
network="Linear(obs_length, act_length)", # Linear policy
observation_normalization=True, # Normalize the policy inputs
decrease_rewards_by=5.0, # Decrease each reward by 5.0
Expand Down
12 changes: 6 additions & 6 deletions docs/user_guide/gym.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ from evotorch.neuroevolution import GymNE

problem = GymNE(
# Name of the environment
env_name="LunarLanderContinuous-v2",
env="LunarLanderContinuous-v2",
# Linear policy mapping observations to actions
network="Linear(obs_length, act_length)",
# Use 4 available CPUs. Note that you can modify this value,
Expand Down Expand Up @@ -68,7 +68,7 @@ class CustomPolicy(torch.nn.Module):


problem = GymNE(
env_name="LunarLanderContinuous-v2",
env="LunarLanderContinuous-v2",
network=CustomPolicy,
num_actors=4,
)
Expand All @@ -89,7 +89,7 @@ You can specify additional arguments to pass to the instantiation of the environ

```python
problem = GymNE(
env_name="LunarLanderContinuous-v2",
env="LunarLanderContinuous-v2",
env_config={
"gravity": -1e-5,
},
Expand All @@ -110,7 +110,7 @@ The `num_episodes` argument allows you to evaluate individual networks repeatedl

```python
problem = GymNE(
env_name="LunarLanderContinuous-v2",
env="LunarLanderContinuous-v2",
network=CustomPolicy,
num_actors=4,
num_episodes=5,
Expand All @@ -129,7 +129,7 @@ While in practice this means that the problem is non-stationary, as the expectio

```python
problem = GymNE(
env_name="LunarLanderContinuous-v2",
env="LunarLanderContinuous-v2",
network=CustomPolicy,
num_actors=4,
observation_normalization=True,
Expand Down Expand Up @@ -167,7 +167,7 @@ For example, the `"Humanoid-v4"` environment [has an `alive_bonus` value of 5](h

```python
problem = GymNE(
env_name="Humanoid-v4",
env="Humanoid-v4",
network=CustomPolicy,
decrease_rewards_by=5.0,
)
Expand Down
4 changes: 2 additions & 2 deletions examples/notebooks/Brax_Experiments_with_PGPE.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,8 @@
"outputs": [],
"source": [
"problem = VecGymNE(\n",
" \"brax::humanoid\", # solve the brax task named \"humanoid\"\n",
" policy,\n",
" env=\"brax::humanoid\", # solve the brax task named \"humanoid\"\n",
" network=policy,\n",
" #\n",
" # Collect observation stats, and use those stats to normalize incoming observations\n",
" observation_normalization=True,\n",
Expand Down
17 changes: 11 additions & 6 deletions examples/notebooks/Gym_Experiments_with_PGPE_and_CoSyNE.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,21 @@
"source": [
"import torch\n",
"from torch import nn\n",
"from evotorch.decorators import pass_info\n",
"\n",
"\n",
"# The decorator `@pass_info` used below tells the problem class `GymNE`\n",
"# to pass information regarding the gym environment via keyword arguments\n",
"# such as `obs_length` and `act_length`.\n",
"@pass_info\n",
"class LinearPolicy(nn.Module):\n",
" \n",
" def __init__(\n",
" self, \n",
" obs_length: int, # Number of observations from the environment\n",
" act_length: int, # Number of actions of the environment\n",
" bias: bool = True, # Whether the policy should use biases\n",
" **kwargs # Anything else that is passed\n",
" ):\n",
" ):\n",
" super().__init__() # Always call super init for nn Modules\n",
" self.linear = nn.Linear(obs_length, act_length, bias = bias)\n",
" \n",
Expand All @@ -71,7 +76,7 @@
"from evotorch.neuroevolution import GymNE\n",
"\n",
"problem = GymNE(\n",
" env_name=\"LunarLanderContinuous-v2\", # Name of the environment\n",
" env=\"LunarLanderContinuous-v2\", # Name of the environment\n",
" network=LinearPolicy, # Linear policy that we defined earlier\n",
" network_args = {'bias': False}, # Linear policy should not use biases\n",
" num_actors= 4, # Use 4 available CPUs. Note that you can modify this value, or use 'max' to exploit all available CPUs\n",
Expand Down Expand Up @@ -189,7 +194,7 @@
"outputs": [],
"source": [
"problem = GymNE(\n",
" env_name=\"LunarLanderContinuous-v2\",\n",
" env=\"LunarLanderContinuous-v2\",\n",
" network=LinearPolicy,\n",
" network_args = {'bias': False},\n",
" num_actors= 4, \n",
Expand Down Expand Up @@ -250,7 +255,7 @@
"id": "3dcb5243",
"metadata": {},
"source": [
"And once again we can visualize the learned policy. As `CoSyNE` is population based, it does not maintain a 'best estimate' of a good policy. Instead, we simply take the best performing solution from the current population. "
"And once again we can visualize the learned policy. As `Cosyne` is population based, it does not maintain a 'best estimate' of a good policy. Instead, we simply take the best performing solution from the current population. "
]
},
{
Expand Down Expand Up @@ -296,7 +301,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
"version": "3.7.13"
}
},
"nbformat": 4,
Expand Down
4 changes: 2 additions & 2 deletions examples/scripts/rl_clipup.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def main(_config: dict):

# Instantiate the problem class
problem = GymNE(
env_name=env_name,
env=env_name,
network=_config["policy"],
observation_normalization=_config["observation_normalization"],
decrease_rewards_by=none_if_nan(_config["decrease_rewards_by"]),
Expand Down Expand Up @@ -268,7 +268,7 @@ def main(_config: dict):
# Create a test problem instance -- note the difference in configuration

test_problem = GymNE(
env_name=actual_env_name, # Using the actual environment name, rather than a modified version
env=actual_env_name, # Using the actual environment name, rather than a modified version
network=_config["policy"],
observation_normalization=_config["observation_normalization"],
decrease_rewards_by=0.0, # Not changing the rewards
Expand Down
13 changes: 11 additions & 2 deletions examples/scripts/rl_gym.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
# limitations under the License.

from evotorch.algorithms import PGPE
from evotorch.logging import StdOutLogger
from evotorch.logging import PicklingLogger, StdOutLogger
from evotorch.neuroevolution import GymNE

# Specialized Problem class for Gym environments
problem = GymNE(
env_name="Humanoid-v4",
env="Humanoid-v4",
# Linear policy defined using special string syntax supported by EvoTorch
network="Linear(obs_length, act_length)",
observation_normalization=True,
Expand All @@ -40,7 +40,16 @@
num_interactions=150000,
popsize_max=3200,
)

# Instantiate a logger that will print the progress to the standard output
logger = StdOutLogger(searcher)

# Optional:
# Instantiate a logger that will, at every 10 generations, pickle and save the results (where the results will include
# the center of the search distribution since we are using PGPE which is a distribution-based search algorithm).
pickler = PicklingLogger(searcher, interval=10)

# Run the search algorithm
searcher.run(500)

# Create a policy to test using the final center of the optimized distribution and visualize its behavior
Expand Down

0 comments on commit a232d04

Please sign in to comment.