docs: Update docs and examples related to GymNE (#31)

* docs: Update docs and examples related to GymNE * Update docs and examples related to GymNE * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Update rl_clipup script * Use keyword arguments for VecGymNE in Brax example notebook * Clarify what PicklingLogger does in example codes Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Vojtech Micka <vojtech@nnaisense.com>
nnaisense · Oct 24, 2022 · a232d04 · a232d04
1 parent cd7f84b
commit a232d04
Show file tree

Hide file tree

Showing 8 changed files with 41 additions and 22 deletions.
diff --git a/README.md b/README.md
@@ -119,12 +119,12 @@ The following example demonstrates how to solve reinforcement learning tasks tha
 
 ```python
 from evotorch.algorithms import PGPE
-from evotorch.logging import StdOutLogger
+from evotorch.logging import StdOutLogger, PicklingLogger
 from evotorch.neuroevolution import GymNE
 
 # Declare the problem to solve
 problem = GymNE(
-    env_name="Humanoid-v4",  # Solve the Humanoid-v4 task
+    env="Humanoid-v4",  # Solve the Humanoid-v4 task
     network="Linear(obs_length, act_length)",  # Linear policy
     observation_normalization=True,  # Normalize the policy inputs
     decrease_rewards_by=5.0,  # Decrease each reward by 5.0
@@ -156,6 +156,11 @@ searcher = PGPE(
 # Instantiate a standard output logger
 _ = StdOutLogger(searcher)
 
+# Optional: Instantiate a logger to pickle and save the results periodically.
+# In this example, among the saved results will be the center of the search
+# distribution, since we are using PGPE which is distribution-based.
+_ = PicklingLogger(searcher, interval=10)
+
 # Run the algorithm for the specified amount of generations
 searcher.run(500)
 

diff --git a/docs/advanced_usage/custom_ea.md b/docs/advanced_usage/custom_ea.md
@@ -196,7 +196,7 @@ With our custom EA defined, we're ready to run a simple experiment. For this we
 from evotorch.neuroevolution import GymNE
 
 prob = GymNE(
-    env_name="CartPole-v1",
+    env="CartPole-v1",
     network="""
     Linear(obs_length, 16)
     >> Tanh()

diff --git a/docs/advanced_usage/dist_based.md b/docs/advanced_usage/dist_based.md
@@ -97,7 +97,7 @@ from evotorch.logging import StdOutLogger
 
 # Declare the problem to solve
 problem = GymNE(
-    env_name="Humanoid-v4",  # Solve the Humanoid-v4 task
+    env="Humanoid-v4",  # Solve the Humanoid-v4 task
     network="Linear(obs_length, act_length)",  # Linear policy
     observation_normalization=True,  # Normalize the policy inputs
     decrease_rewards_by=5.0,  # Decrease each reward by 5.0

diff --git a/docs/user_guide/gym.md b/docs/user_guide/gym.md
@@ -36,7 +36,7 @@ from evotorch.neuroevolution import GymNE
 
 problem = GymNE(
     # Name of the environment
-    env_name="LunarLanderContinuous-v2",
+    env="LunarLanderContinuous-v2",
     # Linear policy mapping observations to actions
     network="Linear(obs_length, act_length)",
     # Use 4 available CPUs. Note that you can modify this value,
@@ -68,7 +68,7 @@ class CustomPolicy(torch.nn.Module):
 
 
 problem = GymNE(
-    env_name="LunarLanderContinuous-v2",
+    env="LunarLanderContinuous-v2",
     network=CustomPolicy,
     num_actors=4,
 )
@@ -89,7 +89,7 @@ You can specify additional arguments to pass to the instantiation of the environ
 
 ```python
 problem = GymNE(
-    env_name="LunarLanderContinuous-v2",
+    env="LunarLanderContinuous-v2",
     env_config={
         "gravity": -1e-5,
     },
@@ -110,7 +110,7 @@ The `num_episodes` argument allows you to evaluate individual networks repeatedl
 
 ```python
 problem = GymNE(
-    env_name="LunarLanderContinuous-v2",
+    env="LunarLanderContinuous-v2",
     network=CustomPolicy,
     num_actors=4,
     num_episodes=5,
@@ -129,7 +129,7 @@ While in practice this means that the problem is non-stationary, as the expectio
 
 ```python
 problem = GymNE(
-    env_name="LunarLanderContinuous-v2",
+    env="LunarLanderContinuous-v2",
     network=CustomPolicy,
     num_actors=4,
     observation_normalization=True,
@@ -167,7 +167,7 @@ For example, the `"Humanoid-v4"` environment [has an `alive_bonus` value of 5](h
 
 ```python
 problem = GymNE(
-    env_name="Humanoid-v4",
+    env="Humanoid-v4",
     network=CustomPolicy,
     decrease_rewards_by=5.0,
 )

diff --git a/examples/notebooks/Brax_Experiments_with_PGPE.ipynb b/examples/notebooks/Brax_Experiments_with_PGPE.ipynb
@@ -176,8 +176,8 @@
    "outputs": [],
    "source": [
     "problem = VecGymNE(\n",
-    "    \"brax::humanoid\",  # solve the brax task named \"humanoid\"\n",
-    "    policy,\n",
+    "    env=\"brax::humanoid\",  # solve the brax task named \"humanoid\"\n",
+    "    network=policy,\n",
     "    #\n",
     "    # Collect observation stats, and use those stats to normalize incoming observations\n",
     "    observation_normalization=True,\n",

diff --git a/examples/notebooks/Gym_Experiments_with_PGPE_and_CoSyNE.ipynb b/examples/notebooks/Gym_Experiments_with_PGPE_and_CoSyNE.ipynb
@@ -35,16 +35,21 @@
    "source": [
     "import torch\n",
     "from torch import nn\n",
+    "from evotorch.decorators import pass_info\n",
     "\n",
+    "\n",
+    "# The decorator `@pass_info` used below tells the problem class `GymNE`\n",
+    "# to pass information regarding the gym environment via keyword arguments\n",
+    "# such as `obs_length` and `act_length`.\n",
+    "@pass_info\n",
     "class LinearPolicy(nn.Module):\n",
-    "    \n",
     "    def __init__(\n",
     "        self, \n",
     "        obs_length: int, # Number of observations from the environment\n",
     "        act_length: int, # Number of actions of the environment\n",
     "        bias: bool = True,  # Whether the policy should use biases\n",
     "        **kwargs # Anything else that is passed\n",
-    "        ):\n",
+    "    ):\n",
     "        super().__init__()  # Always call super init for nn Modules\n",
     "        self.linear = nn.Linear(obs_length, act_length, bias = bias)\n",
     "        \n",
@@ -71,7 +76,7 @@
     "from evotorch.neuroevolution import GymNE\n",
     "\n",
     "problem = GymNE(\n",
-    "    env_name=\"LunarLanderContinuous-v2\",  # Name of the environment\n",
+    "    env=\"LunarLanderContinuous-v2\",  # Name of the environment\n",
     "    network=LinearPolicy,  # Linear policy that we defined earlier\n",
     "    network_args = {'bias': False},  # Linear policy should not use biases\n",
     "    num_actors= 4,  # Use 4 available CPUs. Note that you can modify this value, or use 'max' to exploit all available CPUs\n",
@@ -189,7 +194,7 @@
    "outputs": [],
    "source": [
     "problem = GymNE(\n",
-    "    env_name=\"LunarLanderContinuous-v2\",\n",
+    "    env=\"LunarLanderContinuous-v2\",\n",
     "    network=LinearPolicy,\n",
     "    network_args = {'bias': False},\n",
     "    num_actors= 4, \n",
@@ -250,7 +255,7 @@
    "id": "3dcb5243",
    "metadata": {},
    "source": [
-    "And once again we can visualize the learned policy. As `CoSyNE` is population based, it does not maintain a 'best estimate' of a good policy. Instead, we simply take the best performing solution from the current population. "
+    "And once again we can visualize the learned policy. As `Cosyne` is population based, it does not maintain a 'best estimate' of a good policy. Instead, we simply take the best performing solution from the current population. "
    ]
   },
   {
@@ -296,7 +301,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.13"
+   "version": "3.7.13"
   }
  },
  "nbformat": 4,

diff --git a/examples/scripts/rl_clipup.py b/examples/scripts/rl_clipup.py
@@ -225,7 +225,7 @@ def main(_config: dict):
 
     # Instantiate the problem class
     problem = GymNE(
-        env_name=env_name,
+        env=env_name,
         network=_config["policy"],
         observation_normalization=_config["observation_normalization"],
         decrease_rewards_by=none_if_nan(_config["decrease_rewards_by"]),
@@ -268,7 +268,7 @@ def main(_config: dict):
         # Create a test problem instance -- note the difference in configuration
 
         test_problem = GymNE(
-            env_name=actual_env_name,  # Using the actual environment name, rather than a modified version
+            env=actual_env_name,  # Using the actual environment name, rather than a modified version
             network=_config["policy"],
             observation_normalization=_config["observation_normalization"],
             decrease_rewards_by=0.0,  # Not changing the rewards

diff --git a/examples/scripts/rl_gym.py b/examples/scripts/rl_gym.py
@@ -13,12 +13,12 @@
 # limitations under the License.
 
 from evotorch.algorithms import PGPE
-from evotorch.logging import StdOutLogger
+from evotorch.logging import PicklingLogger, StdOutLogger
 from evotorch.neuroevolution import GymNE
 
 # Specialized Problem class for Gym environments
 problem = GymNE(
-    env_name="Humanoid-v4",
+    env="Humanoid-v4",
     # Linear policy defined using special string syntax supported by EvoTorch
     network="Linear(obs_length, act_length)",
     observation_normalization=True,
@@ -40,7 +40,16 @@
     num_interactions=150000,
     popsize_max=3200,
 )
+
+# Instantiate a logger that will print the progress to the standard output
 logger = StdOutLogger(searcher)
+
+# Optional:
+# Instantiate a logger that will, at every 10 generations, pickle and save the results (where the results will include
+# the center of the search distribution since we are using PGPE which is a distribution-based search algorithm).
+pickler = PicklingLogger(searcher, interval=10)
+
+# Run the search algorithm
 searcher.run(500)
 
 # Create a policy to test using the final center of the optimized distribution and visualize its behavior