instadeepai · clement-bonnet · Jan 16, 2024 · Jan 16, 2024 · Jan 16, 2024
diff --git a/docs/environments/cleaner.md b/docs/environments/cleaner.md
@@ -20,7 +20,7 @@ always start in the top left corner of the maze.
 ## Observation
 The **observation** seen by the agent is a `NamedTuple` containing the following:
 
-- `grid`: jax array (int) of shape `(num_rows, num_cols)`, array representing the grid, each tile is
+- `grid`: jax array (int8) of shape `(num_rows, num_cols)`, array representing the grid, each tile is
     either dirty (0), clean (1), or a wall (2).
 
 - `agents_locations`: jax array (int) of shape `(num_agents, 2)`, array specifying the x and y

diff --git a/jumanji/environments/routing/cleaner/env.py b/jumanji/environments/routing/cleaner/env.py
@@ -35,7 +35,7 @@ class Cleaner(Environment[State]):
     a maze.
 
     - observation: `Observation`
-        - grid: jax array (int32) of shape (num_rows, num_cols)
+        - grid: jax array (int8) of shape (num_rows, num_cols)
             contains the state of the board: 0 for dirty tile, 1 for clean tile, 2 for wall.
         - agents_locations: jax array (int32) of shape (num_agents, 2)
             contains the location of each agent on the board.
@@ -57,7 +57,7 @@ class Cleaner(Environment[State]):
         - An invalid action is selected for any of the agents.
 
     - state: `State`
-        - grid: jax array (int32) of shape (num_rows, num_cols)
+        - grid: jax array (int8) of shape (num_rows, num_cols)
             contains the current state of the board: 0 for dirty tile, 1 for clean tile, 2 for wall.
         - agents_locations: jax array (int32) of shape (num_agents, 2)
             contains the location of each agent on the board.
@@ -127,15 +127,15 @@ def observation_spec(self) -> specs.Spec[Observation]:
 
         Returns:
             Spec for the `Observation`, consisting of the fields:
-                - grid: BoundedArray (int32) of shape (num_rows, num_cols). Values
+                - grid: BoundedArray (int8) of shape (num_rows, num_cols). Values
                     are between 0 and 2 (inclusive).
                 - agent_locations_spec: BoundedArray (int32) of shape (num_agents, 2).
                     Maximum value for the first column is num_rows, and maximum value
                     for the second is num_cols.
                 - action_mask: BoundedArray (bool) of shape (num_agent, 4).
                 - step_count: BoundedArray (int32) of shape ().
         """
-        grid = specs.BoundedArray(self.grid_shape, jnp.int32, 0, 2, "grid")
+        grid = specs.BoundedArray(self.grid_shape, jnp.int8, 0, 2, "grid")
         agents_locations = specs.BoundedArray(
             (self.num_agents, 2), jnp.int32, [0, 0], self.grid_shape, "agents_locations"
         )

diff --git a/jumanji/environments/routing/cleaner/env_test.py b/jumanji/environments/routing/cleaner/env_test.py
@@ -32,7 +32,8 @@
         [DIRTY, DIRTY, DIRTY, DIRTY, WALL],
         [DIRTY, WALL, WALL, DIRTY, WALL],
         [DIRTY, WALL, DIRTY, DIRTY, DIRTY],
-    ]
+    ],
+    dtype=jnp.int8,
 )