Fix PPO logging of clip_fractions (#150)

* bugfix for PPO logging of clip_fractions * Update changelog.rst Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
DLR-RM · Sep 1, 2020 · 4fd408b · 4fd408b
1 parent f8c25d3
commit 4fd408b
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 1 deletion.
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -18,6 +18,7 @@ New Features:
 Bug Fixes:
 ^^^^^^^^^^
 - Fixed a bug where the environment was reset twice when using ``evaluate_policy``
+- Fix logging of ``clip_fraction`` in PPO (@diditforlulz273)
 
 Deprecations:
 ^^^^^^^^^^^^^
@@ -398,3 +399,4 @@ And all the contributors:
 @MarvineGothic @jdossgollin @SyllogismRXS @rusu24edward @jbulow @Antymon @seheevic @justinkterry @edbeeching
 @flodorner @KuKuXia @NeoExtended @PartiallyTyped @mmcenta @richardwu @kinalmehta @rolandgvc @tkelestemur @mloo3
 @tirafesi @blurLake @koulakis @joeljosephjin @shwang @rk37 @andyshih12 @RaphaelWag @xicocaio
+@diditforlulz273
diff --git a/stable_baselines3/ppo/ppo.py b/stable_baselines3/ppo/ppo.py
@@ -228,7 +228,7 @@ def train(self) -> None:
         logger.record("train/policy_gradient_loss", np.mean(pg_losses))
         logger.record("train/value_loss", np.mean(value_losses))
         logger.record("train/approx_kl", np.mean(approx_kl_divs))
-        logger.record("train/clip_fraction", np.mean(clip_fraction))
+        logger.record("train/clip_fraction", np.mean(clip_fractions))
         logger.record("train/loss", loss.item())
         logger.record("train/explained_variance", explained_var)
         if hasattr(self.policy, "log_std"):