Merge pull request #76 from HumanCompatibleAI/tomato_featurization

Tomato featurization
HumanCompatibleAI · May 29, 2021 · a60e31a · a60e31a
2 parents 3b7a161 + 26f82c6
commit a60e31a
Show file tree

Hide file tree

Showing 20 changed files with 1,148 additions and 254 deletions.
diff --git a/.github/workflows/pythontests.yml b/.github/workflows/pythontests.yml
@@ -33,10 +33,9 @@ jobs:
       run: pip install -e .
     - name: Run tests and generate coverage report
       run: |
-        python -m unittest discover -s testing/ -p "*_test.py"
+        coverage run -m unittest discover -s testing/ -p "*_test.py"
     - name: Upload coverage to Codecov
       uses: codecov/codecov-action@v1
       with:
-        flags: no-planners
         name: codecov-report
         fail_ci_if_error: false
diff --git a/codecov.yml b/codecov.yml
@@ -1,4 +1,9 @@
 codecov:
+    status:
+        project:
+            default:
+                target: auto
+                threshold: 1%
     require_ci_to_pass: yes
     max_report_age: off
 

diff --git a/src/overcooked_ai_py/agents/benchmarking.py b/src/overcooked_ai_py/agents/benchmarking.py
@@ -8,6 +8,7 @@
 from overcooked_ai_py.mdp.overcooked_mdp import OvercookedGridworld, Action, OvercookedState
 from overcooked_ai_py.mdp.overcooked_env import OvercookedEnv
 from overcooked_ai_py.mdp.layout_generator import LayoutGenerator
+from overcooked_ai_py.mdp.overcooked_trajectory import DEFAULT_TRAJ_KEYS
 
 
 class AgentEvaluator(object):
@@ -151,20 +152,20 @@ def get_agent_pair_trajs(self, a0, a1=None, num_games=100, game_length=None, sta
         return trajs_0, trajs_1
 
     @staticmethod
-    def check_trajectories(trajectories, from_json=False):
+    def check_trajectories(trajectories, from_json=False, **kwargs):
         """
         Checks that of trajectories are in standard format and are consistent with dynamics of mdp.
         If the trajectories were saves as json, do not check that they have standard traj keys.
         """
         if not from_json:
             AgentEvaluator._check_standard_traj_keys(set(trajectories.keys()))
         AgentEvaluator._check_right_types(trajectories)
-        AgentEvaluator._check_trajectories_dynamics(trajectories)
+        AgentEvaluator._check_trajectories_dynamics(trajectories, **kwargs)
         # TODO: Check shapes?
 
     @staticmethod
     def _check_standard_traj_keys(traj_keys_set):
-        default_traj_keys = OvercookedEnv.DEFAULT_TRAJ_KEYS
+        default_traj_keys = DEFAULT_TRAJ_KEYS
         assert traj_keys_set == set(default_traj_keys), "Keys of traj dict did not match standard form.\nMissing keys: {}\nAdditional keys: {}".format(
             [k for k in default_traj_keys if k not in traj_keys_set], [k for k in traj_keys_set if k not in default_traj_keys]
         )
@@ -181,10 +182,11 @@ def _check_right_types(trajectories):
             # TODO: check that are all lists
 
     @staticmethod
-    def _check_trajectories_dynamics(trajectories):
+    def _check_trajectories_dynamics(trajectories, verbose=True):
         if any(env_params["_variable_mdp"] for env_params in trajectories["env_params"]):
-            print("Skipping trajectory consistency checking because MDP was recognized as variable. "
-                  "Trajectory consistency checking is not yet supported for variable MDPs.")
+            if verbose:
+                print("Skipping trajectory consistency checking because MDP was recognized as variable. "
+                    "Trajectory consistency checking is not yet supported for variable MDPs.")
             return
 
         _, envs = AgentEvaluator.get_mdps_and_envs_from_trajectories(trajectories)
@@ -241,7 +243,7 @@ def load_trajectories(filename):
     @staticmethod
     def save_traj_as_json(trajectory, filename):
         """Saves the `idx`th trajectory as a list of state action pairs"""
-        assert set(OvercookedEnv.DEFAULT_TRAJ_KEYS) == set(trajectory.keys()), "{} vs\n{}".format(OvercookedEnv.DEFAULT_TRAJ_KEYS, trajectory.keys())
+        assert set(DEFAULT_TRAJ_KEYS) == set(trajectory.keys()), "{} vs\n{}".format(DEFAULT_TRAJ_KEYS, trajectory.keys())
         AgentEvaluator.check_trajectories(trajectory)
         trajectory = AgentEvaluator.make_trajectories_json_serializable(trajectory)
         save_as_json(trajectory, filename)

diff --git a/src/overcooked_ai_py/data/testing/test_display/expected.txt b/src/overcooked_ai_py/data/testing/test_display/expected.txt
@@ -0,0 +1,269 @@
+X       X       P       X       X       
+
+O                       ↑1      O       
+
+X       ↑0                      X       
+
+X       D       X       S       X       
+
+
+Timestep: 1
+Joint action taken: ('stay', 'stay') 	 Reward: 0 + shaping_factor * [0, 0]
+Action probs by index: [None, None]  
+X       X       P       X       X       
+
+O                       ↑1      O       
+
+X       ↑0                      X       
+
+X       D       X       S       X       
+
+
+
+Timestep: 2
+Joint action taken: ('←', '→') 	 Reward: 0 + shaping_factor * [0, 0]
+Action probs by index: [None, None]  
+X       X       P       X       X       
+
+O                       →1      O       
+
+X       ←0                      X       
+
+X       D       X       S       X       
+
+
+
+Timestep: 3
+Joint action taken: ('←', '→') 	 Reward: 0 + shaping_factor * [0, 0]
+Action probs by index: [None, None]  
+X       X       P       X       X       
+
+O                       →1      O       
+
+X       ←0                      X       
+
+X       D       X       S       X       
+
+
+
+Timestep: 4
+Joint action taken: ('stay', 'stay') 	 Reward: 0 + shaping_factor * [0, 0]
+Action probs by index: [None, None]  
+X       X       P       X       X       
+
+O                       →1      O       
+
+X       ←0                      X       
+
+X       D       X       S       X       
+
+
+
+Timestep: 5
+Joint action taken: ('stay', 'stay') 	 Reward: 0 + shaping_factor * [0, 0]
+Action probs by index: [None, None]  
+X       X       P       X       X       
+
+O                       →1      O       
+
+X       ←0                      X       
+
+X       D       X       S       X       
+
+
+
+Timestep: 6
+Joint action taken: ('stay', 'stay') 	 Reward: 0 + shaping_factor * [0, 0]
+Action probs by index: [None, None]  
+X       X       P       X       X       
+
+O                       →1      O       
+
+X       ←0                      X       
+
+X       D       X       S       X       
+
+
+
+Timestep: 7
+Joint action taken: ('stay', 'stay') 	 Reward: 0 + shaping_factor * [0, 0]
+Action probs by index: [None, None]  
+X       X       P       X       X       
+
+O                       →1      O       
+
+X       ←0                      X       
+
+X       D       X       S       X       
+
+
+
+Timestep: 8
+Joint action taken: ('stay', 'stay') 	 Reward: 0 + shaping_factor * [0, 0]
+Action probs by index: [None, None]  
+X       X       P       X       X       
+
+O                       →1      O       
+
+X       ←0                      X       
+
+X       D       X       S       X       
+
+
+
+Timestep: 9
+Joint action taken: ('stay', 'stay') 	 Reward: 0 + shaping_factor * [0, 0]
+Action probs by index: [None, None]  
+X       X       P       X       X       
+
+O                       →1      O       
+
+X       ←0                      X       
+
+X       D       X       S       X       
+
+
+
+Timestep: 10
+Joint action taken: ('stay', 'stay') 	 Reward: 0 + shaping_factor * [0, 0]
+Action probs by index: [None, None]  
+X       X       P       X       X       
+
+O                       →1      O       
+
+X       ←0                      X       
+
+X       D       X       S       X       
+
+
+
+Timestep: 11
+Joint action taken: ('stay', 'stay') 	 Reward: 0 + shaping_factor * [0, 0]
+Action probs by index: [None, None]  
+X       X       P       X       X       
+
+O                       →1      O       
+
+X       ←0                      X       
+
+X       D       X       S       X       
+
+
+
+Timestep: 12
+Joint action taken: ('stay', 'stay') 	 Reward: 0 + shaping_factor * [0, 0]
+Action probs by index: [None, None]  
+X       X       P       X       X       
+
+O                       →1      O       
+
+X       ←0                      X       
+
+X       D       X       S       X       
+
+
+
+Timestep: 13
+Joint action taken: ('stay', 'stay') 	 Reward: 0 + shaping_factor * [0, 0]
+Action probs by index: [None, None]  
+X       X       P       X       X       
+
+O                       →1      O       
+
+X       ←0                      X       
+
+X       D       X       S       X       
+
+
+
+Timestep: 14
+Joint action taken: ('stay', 'stay') 	 Reward: 0 + shaping_factor * [0, 0]
+Action probs by index: [None, None]  
+X       X       P       X       X       
+
+O                       →1      O       
+
+X       ←0                      X       
+
+X       D       X       S       X       
+
+
+
+Timestep: 15
+Joint action taken: ('stay', 'stay') 	 Reward: 0 + shaping_factor * [0, 0]
+Action probs by index: [None, None]  
+X       X       P       X       X       
+
+O                       →1      O       
+
+X       ←0                      X       
+
+X       D       X       S       X       
+
+
+
+Timestep: 16
+Joint action taken: ('stay', 'stay') 	 Reward: 0 + shaping_factor * [0, 0]
+Action probs by index: [None, None]  
+X       X       P       X       X       
+
+O                       →1      O       
+
+X       ←0                      X       
+
+X       D       X       S       X       
+
+
+
+Timestep: 17
+Joint action taken: ('stay', 'stay') 	 Reward: 0 + shaping_factor * [0, 0]
+Action probs by index: [None, None]  
+X       X       P       X       X       
+
+O                       →1      O       
+
+X       ←0                      X       
+
+X       D       X       S       X       
+
+
+
+Timestep: 18
+Joint action taken: ('stay', 'stay') 	 Reward: 0 + shaping_factor * [0, 0]
+Action probs by index: [None, None]  
+X       X       P       X       X       
+
+O                       →1      O       
+
+X       ←0                      X       
+
+X       D       X       S       X       
+
+
+
+Timestep: 19
+Joint action taken: ('stay', 'stay') 	 Reward: 0 + shaping_factor * [0, 0]
+Action probs by index: [None, None]  
+X       X       P       X       X       
+
+O                       →1      O       
+
+X       ←0                      X       
+
+X       D       X       S       X       
+
+
+
+Timestep: 20
+Joint action taken: ('stay', 'stay') 	 Reward: 0 + shaping_factor * [0, 0]
+Action probs by index: [None, None]  
+X       X       P       X       X       
+
+O                       →1      O       
+
+X       ←0                      X       
+
+X       D       X       S       X       
+
+
+