Merge pull request #70 from opentensor/hotfix/disable-blacklist

Remove missing rewards
opentensor · Nov 13, 2023 · 21835e0 · 21835e0
2 parents 1e3d367 + 2c52f47
commit 21835e0
Show file tree

Hide file tree

Showing 6 changed files with 93 additions and 7 deletions.
diff --git a/prompting/validators/__init__.py b/prompting/validators/__init__.py
@@ -27,7 +27,7 @@
 from . import event
 from . import dataset
 
-__version__ = "2.1.2"
+__version__ = "2.1.3"
 version_split = __version__.split(".")
 __spec_version__ = (
     (1000 * int(version_split[0]))

diff --git a/prompting/validators/forward.py b/prompting/validators/forward.py
@@ -230,10 +230,7 @@ async def forward(self):
     base_text = ".".join(data.split(".", maxsplit=random_cutoff)[:-1])
 
     # Create a summary task from the context.
-    summary_task: Task = create_summarization_task(base_text)
-
-    # Reset Blacklist reward model
-    self.blacklist.reset()
+    summary_task: Task = create_summarization_task(base_text)    
 
     # Request a summary, given the original context.
     summarization_event = await run_step(

diff --git a/prompting/validators/reward/blacklist.py b/prompting/validators/reward/blacklist.py
@@ -302,7 +302,7 @@ def reward(self, prompt: str, completion: str, name: str) -> BlacklistRewardEven
                 and fuzz.partial_ratio(ngram, completion.lower())
                 > self.partial_ratio_boundary
             ):
-                reward_event.reward = 0
+                reward_event.reward = 1
                 reward_event.matched_ngram = ngram
                 reward_event.significance_score = score
                 return reward_event

diff --git a/prompting/validators/reward/dpo.py b/prompting/validators/reward/dpo.py
@@ -129,7 +129,8 @@ def reward_single(
 
             # NaNs can possibly arise through log(0)=-inf, replace with suitably small logits.
             if torch.isnan(reward) or torch.isinf(reward):
-                reward_event.reward = 11
+                reward_event.reward = -11
+                return reward_event
 
             reward_event.reward = reward.item()
             return reward_event

diff --git a/prompting/validators/reward/reward.py b/prompting/validators/reward/reward.py
@@ -167,6 +167,11 @@ def apply(
         reward_events = {f"{self.name}_{k}": v for k, v in reward_events.items()}
         reward_events[self.name] = filled_rewards.tolist()
         reward_events[self.name + "_normalized"] = filled_rewards_normalized.tolist()
+
+        # Warns unexpected behavior for rewards
+        if torch.isnan(filled_rewards_normalized).any():
+            bt.logging.warning(f"The tensor from {self.name} contains NaN values: {filled_rewards_normalized}")
+            filled_rewards_normalized.nan_to_num_(nan=0.0)            
 
         # Return the filled rewards.
         return filled_rewards_normalized, reward_events
diff --git a/tests/validators/reward/test_reward_event.py b/tests/validators/reward/test_reward_event.py
@@ -0,0 +1,83 @@
+# The MIT License (MIT)
+# Copyright © 2023 Yuma Rao
+# Copyright © 2023 Opentensor Foundation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
+# the Software.
+
+# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+import unittest
+from dataclasses import fields
+import prompting.validators.reward as reward
+
+class RewardEventTestCase(unittest.TestCase):
+    """
+    This class contains unit tests for the RewardEvent classes.
+
+    The tests cover different scenarios where completions may or may not be successful and the reward events are checked that they don't contain missing values.
+    The `reward` attribute of all RewardEvents is expected to be a float, and the `is_filter_model` attribute is expected to be a boolean.
+    """
+
+    def setUp(self):
+        self.event_classes = [
+            reward.reward.BaseRewardEvent, # Represents a reward model (float)
+            reward.nsfw.NSFWRewardEvent, # Remaining events are filters
+            reward.blacklist.BlacklistRewardEvent,
+            reward.relevance.RelevanceRewardEvent,
+            reward.diversity.DiversityRewardEvent
+        ]
+
+        self.reward_events = {}
+        for event in self.event_classes:
+
+            event_type = event.__name__
+            self.reward_events[event_type] = []
+
+            # Simulate a batch of completions
+            for i in range(50):
+                ev = event()
+
+                # Simulate unsuccessful completions by leaving reward event as its default value
+                if i % 10 == 0:
+                    continue
+
+                for field in fields(ev):
+                    # don't modify the is_filter_model field
+                    if field.name == 'is_filter_model':
+                        continue
+                    # otherwise set the field to a float (including reward)
+                    setattr(ev, field.name, 1.234)
+
+                self.reward_events[event_type].append(ev)
+
+    def test_no_missing_rewards(self):
+
+        for name, events in self.reward_events.items():
+
+            parsed = reward.reward.BaseRewardEvent.parse_reward_events(events)
+
+            # Ensure that all rewards are not None
+            self.assertTrue(all(r is not None for r in parsed['reward']), f'Events for {name} are missing rewards')
+
+
+    def test_imputed_reward_values_are_correct(self):
+
+        for name, events in self.reward_events.items():
+
+            expected_value = 1 if events[0].is_filter_model else 0
+            indices_missing_reward = [i for i, ev in enumerate(events) if ev.reward is None]
+
+            parsed = reward.reward.BaseRewardEvent.parse_reward_events(events)
+
+            # Ensure that all rewards are not None
+            self.assertTrue(all(parsed['reward'][i]==expected_value for i in indices_missing_reward), f'Events for {name} were imputed with incorrect reward value')