facebookresearch · erikwijmans · Oct 9, 2019 · Sep 28, 2019 · Sep 28, 2019 · Sep 28, 2019
diff --git a/configs/test/habitat_all_sensors_test.yaml b/configs/test/habitat_all_sensors_test.yaml
@@ -1,5 +1,7 @@
 ENVIRONMENT:
   MAX_EPISODE_STEPS: 10
+  ITERATOR_OPTIONS:
+    SHUFFLE: False
 SIMULATOR:
   AGENT_0:
     SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR']

diff --git a/configs/test/habitat_mp3d_eqa_test.yaml b/configs/test/habitat_mp3d_eqa_test.yaml
@@ -1,5 +1,8 @@
 TASK:
   TYPE: EQA-v0
+ENVIRONMENT:
+  ITERATOR_OPTIONS:
+    SHUFFLE: False
 SIMULATOR:
   SCENE: data/scene_datasets/mp3d/17DRP5sb8fy/17DRP5sb8fy.glb
   FORWARD_STEP_SIZE: 0.1

diff --git a/habitat/config/default.py b/habitat/config/default.py
@@ -24,10 +24,11 @@
 _C.ENVIRONMENT.MAX_EPISODE_SECONDS = 10000000
 _C.ENVIRONMENT.ITERATOR_OPTIONS = CN()
 _C.ENVIRONMENT.ITERATOR_OPTIONS.CYCLE = True
-_C.ENVIRONMENT.ITERATOR_OPTIONS.SHUFFLE = False
+_C.ENVIRONMENT.ITERATOR_OPTIONS.SHUFFLE = True
 _C.ENVIRONMENT.ITERATOR_OPTIONS.GROUP_BY_SCENE = True
 _C.ENVIRONMENT.ITERATOR_OPTIONS.NUM_EPISODE_SAMPLE = -1
-_C.ENVIRONMENT.ITERATOR_OPTIONS.MAX_SCENE_REPEAT = -1
+_C.ENVIRONMENT.ITERATOR_OPTIONS.MAX_SCENE_REPEAT_EPISODES = -1
+_C.ENVIRONMENT.ITERATOR_OPTIONS.MAX_SCENE_REPEAT_STEPS = int(1e4)
 # -----------------------------------------------------------------------------
 # TASK
 # -----------------------------------------------------------------------------

diff --git a/habitat/core/dataset.py b/habitat/core/dataset.py
@@ -278,8 +278,10 @@ def __init__(
         cycle: bool = True,
         shuffle: bool = False,
         group_by_scene: bool = True,
-        max_scene_repeat: int = -1,
+        max_scene_repeat_episodes: int = -1,
+        max_scene_repeat_steps: int = -1,
         num_episode_sample: int = -1,
+        step_repetition_range: float = 0.2,
     ):
         r"""..
 
@@ -290,32 +292,48 @@ def __init__(
             effect if cycle is set to :py:`False`. Will shuffle grouped scenes
             if :p:`group_by_scene` is :py:`True`.
         :param group_by_scene: if :py:`True`, group episodes from same scene.
-        :param max_scene_repeat: threshold of how many episodes from the same
+        :param max_scene_repeat_episodes: threshold of how many episodes from the same
             scene can be loaded consecutively. :py:`-1` for no limit
+        :param max_scene_repeat_steps: threshold of how many steps from the same
+            scene can be taken consecutively. :py:`-1` for no limit
         :param num_episode_sample: number of episodes to be sampled. :py:`-1`
             for no sampling.
+        :param step_repetition_range: The maximum number of steps within each scene is
+            uniformly drawn from
+            [1 - step_repeat_range, 1 + step_repeat_range] * max_scene_repeat_steps
+            on each scene switch.  This stops all workers from swapping scenes at
+            the same time
         """
+
         # sample episodes
         if num_episode_sample >= 0:
             episodes = np.random.choice(
                 episodes, num_episode_sample, replace=False
             )
+
         self.episodes = episodes
         self.cycle = cycle
         self.group_by_scene = group_by_scene
-        if group_by_scene:
-            num_scene_groups = len(
-                list(groupby(episodes, key=lambda x: x.scene_id))
-            )
-            num_unique_scenes = len(set([e.scene_id for e in episodes]))
-            if num_scene_groups >= num_unique_scenes:
-                self.episodes = sorted(self.episodes, key=lambda x: x.scene_id)
-        self.max_scene_repetition = max_scene_repeat
         self.shuffle = shuffle
-        self._rep_count = 0
+
+        if shuffle:
+            random.shuffle(self.episodes)
+
+        if group_by_scene:
+            self.episodes = sorted(self.episodes, key=lambda x: x.scene_id)
+
+        self.max_scene_repetition_episodes = max_scene_repeat_episodes
+        self.max_scene_repetition_steps = max_scene_repeat_steps
+
+        self._rep_count = -1  # 0 corresponds to first episode already returned
+        self._step_count = 0
         self._prev_scene_id = None
+
         self._iterator = iter(self.episodes)
 
+        self.step_repetition_range = step_repetition_range
+        self._set_shuffle_intervals()
+
     def __iter__(self):
         return self
 
@@ -324,40 +342,98 @@ def __next__(self):
 
         :return: next episode.
         """
+        self._forced_scene_switch_if()
 
         next_episode = next(self._iterator, None)
         if next_episode is None:
             if not self.cycle:
                 raise StopIteration
+
             self._iterator = iter(self.episodes)
+
             if self.shuffle:
-                self._shuffle_iterator()
+                self._shuffle()
+
             next_episode = next(self._iterator)
 
-        if self._prev_scene_id == next_episode.scene_id:
-            self._rep_count += 1
         if (
-            self.max_scene_repetition > 0
-            and self._rep_count >= self.max_scene_repetition - 1
+            self._prev_scene_id != next_episode.scene_id
+            and self._prev_scene_id is not None
         ):
-            self._shuffle_iterator()
             self._rep_count = 0
+            self._step_count = 0
 
         self._prev_scene_id = next_episode.scene_id
         return next_episode
 
-    def _shuffle_iterator(self) -> None:
+    def _forced_scene_switch(self) -> None:
+        r"""Internal method to switch the scene. Moves remaining episodes
+        from current scene to the end and switch to next scene episodes.
+        """
+        grouped_episodes = [
+            list(g)
+            for k, g in groupby(self._iterator, key=lambda x: x.scene_id)
+        ]
+
+        if len(grouped_episodes) > 1:
+            # Ensure we swap by moving the current group to the end
+            grouped_episodes = grouped_episodes[1:] + grouped_episodes[0:1]
+
+        self._iterator = iter(sum(grouped_episodes, []))
+
+    def _shuffle(self) -> None:
         r"""Internal method that shuffles the remaining episodes.
             If self.group_by_scene is true, then shuffle groups of scenes.
         """
+        episodes = list(self._iterator)
+
+        random.shuffle(episodes)
+
         if self.group_by_scene:
-            grouped_episodes = [
-                list(g)
-                for k, g in groupby(self._iterator, key=lambda x: x.scene_id)
-            ]
-            random.shuffle(grouped_episodes)
-            self._iterator = iter(sum(grouped_episodes, []))
+            episodes = sorted(episodes, key=lambda x: x.scene_id)
+
+        self._iterator = iter(episodes)
+
+    def step_taken(self):
+        self._step_count += 1
+
+    @staticmethod
+    def _randomize_value(value, value_range):
+        return random.randint(
+            int(value * (1 - value_range)), int(value * (1 + value_range))
+        )
+
+    def _set_shuffle_intervals(self):
+        if self.max_scene_repetition_episodes > 0:
+            self._max_rep_episode = self.max_scene_repetition_episodes
+        else:
+            self._max_rep_episode = None
+
+        if self.max_scene_repetition_steps > 0:
+            self._max_rep_step = self._randomize_value(
+                self.max_scene_repetition_steps, self.step_repetition_range
+            )
         else:
-            episodes = list(self._iterator)
-            random.shuffle(episodes)
-            self._iterator = iter(episodes)
+            self._max_rep_step = None
+
+    def _forced_scene_switch_if(self):
+        do_switch = False
+        self._rep_count += 1
+
+        # Shuffle if a scene has been selected more than _max_rep_episode times in a row
+        if (
+            self._max_rep_episode is not None
+            and self._rep_count >= self._max_rep_episode
+        ):
+            do_switch = True
+
+        # Shuffle if a scene has been used for more than _max_rep_step steps in a row
+        if (
+            self._max_rep_step is not None
+            and self._step_count >= self._max_rep_step
+        ):
+            do_switch = True
+
+        if do_switch:
+            self._forced_scene_switch()
+            self._set_shuffle_intervals()
diff --git a/habitat/core/env.py b/habitat/core/env.py
@@ -12,7 +12,7 @@
 from gym.spaces.dict_space import Dict as SpaceDict
 
 from habitat.config import Config
-from habitat.core.dataset import Dataset, Episode
+from habitat.core.dataset import Dataset, Episode, EpisodeIterator
 from habitat.core.embodied_task import EmbodiedTask, Metrics
 from habitat.core.simulator import Observations, Simulator
 from habitat.datasets import make_dataset
@@ -213,6 +213,11 @@ def _update_step_stats(self) -> None:
         if self._past_limit():
             self._episode_over = True
 
+        if self.episode_iterator is not None and isinstance(
+            self.episode_iterator, EpisodeIterator
+        ):
+            self.episode_iterator.step_taken()
+
     def step(
         self, action: Union[int, str, Dict[str, Any]], **kwargs
     ) -> Observations:

diff --git a/test/test_dataset.py b/test/test_dataset.py
@@ -240,24 +240,113 @@ def test_iterator_shuffle():
     assert len(first_round_scene_groups) == len(set(first_round_scene_groups))
 
 
-def test_iterator_scene_switching():
+def test_iterator_scene_switching_episodes():
     total_ep = 1000
     max_repeat = 25
     dataset = _construct_dataset(total_ep)
 
-    episode_iter = dataset.get_episode_iterator(max_scene_repeat=max_repeat)
+    episode_iter = dataset.get_episode_iterator(
+        max_scene_repeat_episodes=max_repeat, shuffle=False, cycle=True
+    )
     episodes = sorted(dataset.episodes, key=lambda x: x.scene_id)
 
-    # episodes before max_repeat reached should be identical
     for i in range(max_repeat):
         episode = next(episode_iter)
-        assert episode.episode_id == episodes.pop(0).episode_id
+        assert (
+            episode.episode_id == episodes.pop(0).episode_id
+        ), "episodes before max_repeat reached should be identical"
+
+    episode = next(episode_iter)
+    assert (
+        episode.scene_id != episodes.pop(0).scene_id
+    ), "After max_repeat episodes a scene switch doesn't happen."
+
+    remaining_episodes = list(islice(episode_iter, total_ep - max_repeat - 1))
+    assert len(remaining_episodes) == len(
+        episodes
+    ), "Remaining episodes should be identical."
+
+    assert len(set(e.scene_id for e in remaining_episodes)) == len(
+        set(map(lambda ep: ep.scene_id, remaining_episodes))
+    ), "Next episodes should still include all scenes."
+
+    cycled_episodes = list(islice(episode_iter, 4 * total_ep))
+    assert (
+        len(set(map(lambda x: x.episode_id, cycled_episodes))) == total_ep
+    ), "Some episodes leaked after cycling."
+
+    grouped_episodes = [
+        list(g) for k, g in groupby(cycled_episodes, key=lambda x: x.scene_id)
+    ]
+    assert (
+        len(sum(grouped_episodes, [])) == 4 * total_ep
+    ), "Cycled episode iterator returned unexpected number of episodes."
+    assert (
+        len(grouped_episodes) == 4 * total_ep / max_repeat
+    ), "The number of scene switches is unexpected."
+
+    assert all(
+        [len(group) == max_repeat for group in grouped_episodes]
+    ), "Not all scene switches are equal to required number."
+
+
+def test_iterator_scene_switching_episodes_without_shuffle_cycle():
+    total_ep = 1000
+    max_repeat = 25
+    dataset = _construct_dataset(total_ep)
+    episode_iter = dataset.get_episode_iterator(
+        max_scene_repeat_episodes=max_repeat, shuffle=False, cycle=False
+    )
+
+    grouped_episodes = [
+        list(g) for k, g in groupby(episode_iter, key=lambda x: x.scene_id)
+    ]
+    assert (
+        len(sum(grouped_episodes, [])) == total_ep
+    ), "The episode iterator returned unexpected number of episodes."
+    assert (
+        len(grouped_episodes) == total_ep / max_repeat
+    ), "The number of scene switches is unexpected."
+
+    assert all(
+        [len(group) == max_repeat for group in grouped_episodes]
+    ), "Not all scene stitches are equal to requirement."
+
+
+def test_iterator_scene_switching_steps():
+    total_ep = 1000
+    max_repeat_steps = 250
+    dataset = _construct_dataset(total_ep)
 
-    remaining_episodes = list(islice(episode_iter, total_ep - max_repeat))
-    # remaining episodes should be same but in different order
-    assert len(remaining_episodes) == len(episodes)
-    assert remaining_episodes != episodes
-    assert sorted(remaining_episodes) == sorted(episodes)
+    episode_iter = dataset.get_episode_iterator(
+        max_scene_repeat_steps=max_repeat_steps,
+        shuffle=False,
+        step_repetition_range=0.0,
+    )
+    episodes = sorted(dataset.episodes, key=lambda x: x.scene_id)
+
+    episode = next(episode_iter)
+    assert (
+        episode.episode_id == episodes.pop(0).episode_id
+    ), "After max_repeat_steps episodes a scene switch doesn't happen."
 
-    # next episodes should still be grouped by scene (before next switching)
-    assert len(set([e.scene_id for e in remaining_episodes[:max_repeat]])) == 1
+    # episodes before max_repeat reached should be identical
+    for _ in range(max_repeat_steps):
+        episode_iter.step_taken()
+
+    episode = next(episode_iter)
+    assert (
+        episode.episode_id != episodes.pop(0).episode_id
+    ), "After max_repeat_steps episodes a scene switch doesn't happen."
+
+    remaining_episodes = list(islice(episode_iter, total_ep - 2))
+    assert len(remaining_episodes) == len(
+        episodes
+    ), "Remaining episodes numbers aren't equal."
+
+    assert len(set(e.scene_id for e in remaining_episodes)) == len(
+        list(groupby(remaining_episodes, lambda ep: ep.scene_id))
+    ), (
+        "Next episodes should still be grouped by scene (before next "
+        "switching)."
+    )
diff --git a/test/test_habitat_task.py b/test/test_habitat_task.py
@@ -35,18 +35,12 @@ def test_task_actions():
         }
     )
     agent_state = env.sim.get_agent_state()
-    assert (
-        np.allclose(
-            np.array(TELEPORT_POSITION, dtype=np.float32), agent_state.position
-        )
-        is True
+    assert np.allclose(
+        np.array(TELEPORT_POSITION, dtype=np.float32), agent_state.position
     ), "mismatch in position after teleport"
-    assert (
-        np.allclose(
-            np.array(TELEPORT_ROTATION, dtype=np.float32),
-            np.array([*agent_state.rotation.imag, agent_state.rotation.real]),
-        )
-        is True
+    assert np.allclose(
+        np.array(TELEPORT_ROTATION, dtype=np.float32),
+        np.array([*agent_state.rotation.imag, agent_state.rotation.real]),
     ), "mismatch in rotation after teleport"
     env.step("TURN_RIGHT")
     env.close()