Farama-Foundation · jjshoots · Sep 17, 2022 · Sep 2, 2022 · Sep 5, 2022 · Sep 5, 2022
diff --git a/supersuit/lambda_wrappers/observation_lambda.py b/supersuit/lambda_wrappers/observation_lambda.py
@@ -112,9 +112,9 @@ def _modify_observation(self, observation):
         return self.change_observation_fn(observation, self.env.observation_space)
 
     def step(self, action):
-        observation, rew, done, info = self.env.step(action)
+        observation, rew, termination, truncation, info = self.env.step(action)
         observation = self._modify_observation(observation)
-        return observation, rew, done, info
+        return observation, rew, termination, truncation, info
 
     def reset(self, seed=None, return_info=False, options=None):
         if not return_info:

diff --git a/supersuit/lambda_wrappers/reward_lambda.py b/supersuit/lambda_wrappers/reward_lambda.py
@@ -50,8 +50,8 @@ def __init__(self, env, change_reward_fn):
         super().__init__(env)
 
     def step(self, action):
-        obs, rew, done, info = super().step(action)
-        return obs, self._change_reward_fn(rew), done, info
+        obs, rew, termination, truncation, info = super().step(action)
+        return obs, self._change_reward_fn(rew), termination, truncation, info
 
 
 reward_lambda_v0 = WrapperChooser(

diff --git a/supersuit/vector/multiproc_vec.py b/supersuit/vector/multiproc_vec.py
@@ -201,7 +201,7 @@ def step_wait(self):
         compressed_infos = self._receive_info()
         infos = decompress_info(self.num_envs, self.idx_starts, compressed_infos)
         rewards = self.shared_rews.np_arr
-        dones = self.shared_dones.np_arr
+        dones = self.shared_dones.np_arr.dtype(bool)
         return (
             numpy_deepcopy(self.observations_buffers),
             rewards.copy(),

diff --git a/test/dummy_aec_env.py b/test/dummy_aec_env.py
@@ -29,24 +29,28 @@ def observe(self, agent):
         return self._observations[agent]
 
     def step(self, action, observe=True):
-        if self.dones[self.agent_selection]:
-            return self._was_done_step(action)
+        if (
+            self.terminations[self.agent_selection]
+            or self.truncations[self.agent_selection]
+        ):
+            return self._was_dead_step(action)
         self._cumulative_rewards[self.agent_selection] = 0
         self.agent_selection = self._agent_selector.next()
         self.steps += 1
         if self.steps >= 5 * len(self.agents):
-            self.dones = {a: True for a in self.agents}
+            self.truncations = {a: True for a in self.agents}
 
         self._accumulate_rewards()
-        self._dones_step_first()
+        self._deads_step_first()
 
     def reset(self, seed=None, return_info=False, options=None):
         self.agents = self.possible_agents[:]
         self._agent_selector = agent_selector(self.agents)
         self.agent_selection = self._agent_selector.reset()
         self.rewards = {a: 1 for a in self.agents}
         self._cumulative_rewards = {a: 0 for a in self.agents}
-        self.dones = {a: False for a in self.agents}
+        self.terminations = {a: False for a in self.agents}
+        self.truncations = {a: False for a in self.agents}
         self.infos = {a: {} for a in self.agents}
         self.steps = 0
 

diff --git a/test/gym_mock_test.py b/test/gym_mock_test.py
@@ -135,5 +135,5 @@ def one_hot(x, n):
 def test_rew_lambda():
     env = supersuit.reward_lambda_v0(new_dummy(), lambda x: x / 10)
     env.reset()
-    obs, rew, done, info = env.step(0)
+    obs, rew, termination, truncation, info = env.step(0)
     assert rew == 1.0 / 10
diff --git a/test/parallel_env_test.py b/test/parallel_env_test.py
@@ -18,7 +18,8 @@ def __init__(self, observations, observation_spaces, action_spaces):
         self._action_spaces = action_spaces
 
         self.rewards = {a: 1 for a in self.agents}
-        self.dones = {a: False for a in self.agents}
+        self.terminations = {a: False for a in self.agents}
+        self.truncations = {a: False for a in self.agents}
         self.infos = {a: {} for a in self.agents}
 
     def observation_space(self, agent):
@@ -30,7 +31,13 @@ def action_space(self, agent):
     def step(self, actions):
         for agent, action in actions.items():
             assert action in self.action_space(agent)
-        return self._observations, self.rewards, self.dones, self.infos
+        return (
+            self._observations,
+            self.rewards,
+            self.terminations,
+            self.truncations,
+            self.infos,
+        )
 
     def reset(self, seed=None, return_info=False, options=None):
         if not return_info:
@@ -60,4 +67,4 @@ def test_basic():
     orig_obs = env.reset()
     for i in range(10):
         action = {agent: env.action_space(agent).sample() for agent in env.agents}
-        obs, rew, done, info = env.step(action)
+        obs, rew, termination, truncation, info = env.step(action)
diff --git a/test/vec_env_test.py b/test/vec_env_test.py
@@ -8,7 +8,9 @@ def test_vec_env_args():
     num_envs = 8
     vec_env = gym_vec_env_v0(env, num_envs)
     vec_env.reset()
-    obs, rew, dones, infos = vec_env.step([0] + [1] * (vec_env.num_envs - 1))
+    obs, rew, terminations, truncations, infos = vec_env.step(
+        [0] + [1] * (vec_env.num_envs - 1)
+    )
     assert not np.any(np.equal(obs[0], obs[1]))