Skip to content

Commit

Permalink
Merge pull request #85 from opentensor/staging
Browse files Browse the repository at this point in the history
2.1.5 Release
  • Loading branch information
p-ferreira committed Nov 29, 2023
2 parents b498f12 + 91ed7bf commit 16aaacd
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 28 deletions.
2 changes: 1 addition & 1 deletion neurons/validators/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def __init__(self):
MockRewardModel(RewardModelType.nsfw.value),
]
self.penalty_functions = [
TaskValidationPenaltyModel(max_penalty=0.6),
TaskValidationPenaltyModel(max_penalty=0.75),
ContentMatchPenaltyModel(max_penalty=0.2),
KeywordMatchPenaltyModel(max_penalty=1),
]
Expand Down
2 changes: 1 addition & 1 deletion prompting/validators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from . import event
from . import dataset

__version__ = "2.1.4"
__version__ = "2.1.5"
version_split = __version__.split(".")
__spec_version__ = (
(1000 * int(version_split[0]))
Expand Down
2 changes: 1 addition & 1 deletion prompting/validators/criteria.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,4 +241,4 @@ def evaluate(self, completions: List[str]) -> torch.FloatTensor:
return penalties

def compose_text(self) -> str:
return self.text.format(layout_type=self.layout_type)
return self.text.format(layout_type=self.layout_type.value)
7 changes: 6 additions & 1 deletion prompting/validators/forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ async def run_step(self, task: Task, k: int, timeout: float, exclude: list = [])
return event


async def forward(self):
async def questions_and_answers_around_summary_flow(self):
# Obtain a unique context from the dataset.
data = next(self.dataset)["text"]

Expand Down Expand Up @@ -272,3 +272,8 @@ async def forward(self):
)

exclude += qa_event["uids"]


async def forward(self):
# Definition of flow to be executed at forward step
await questions_and_answers_around_summary_flow(self)
8 changes: 4 additions & 4 deletions prompting/validators/reward/blacklist.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ def name(self) -> str:

def __init__(
self,
boundary: float = 6,
boundary: float = 40,
n_min: int = 5,
n_max: int = 14,
n_max: int = 10,
word_limit: int = 2000,
A: float = 1.3,
preprocess: str = "[^(\\w|\\s)]",
Expand Down Expand Up @@ -213,7 +213,7 @@ def calculate_significance(self) -> dict:
if len(decoded_ngram.split()) >= self.n_min:
# calculate significance score for ngram
significance_scores[decoded_ngram] = (
self.A ** (len(decoded_ngram) - 1)
self.A ** (len(decoded_ngram.split()) - 1)
* ((count[0] + count[1]) / self.num_completion)
* self.frequency_multiplier
)
Expand Down Expand Up @@ -302,7 +302,7 @@ def reward(self, prompt: str, completion: str, name: str) -> BlacklistRewardEven
and fuzz.partial_ratio(ngram, completion.lower())
> self.partial_ratio_boundary
):
reward_event.reward = 1
reward_event.reward = 0
reward_event.matched_ngram = ngram
reward_event.significance_score = score
return reward_event
Expand Down
2 changes: 1 addition & 1 deletion prompting/validators/reward/reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def apply(
bt.logging.warning(
f"The tensor from {self.name} contains NaN values: {filled_rewards_normalized}"
)
filled_rewards_normalized.nan_to_num_(nan=0.0)
filled_rewards_normalized = filled_rewards_normalized.nan_to_num_(nan=0.0)

# Return the filled rewards.
return filled_rewards_normalized, reward_events
77 changes: 58 additions & 19 deletions prompting/validators/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
ContentMatchTypeEnum,
SimpleResponseLayoutCriteria,
MatchContentCriteria,
MatchLayoutCriteria,
LayoutMatchTypeEnum,
)


Expand Down Expand Up @@ -122,24 +124,61 @@ def compose_prompt(self) -> str:


def create_summarization_task(base_text: str) -> SummaryTask:
possible_criterias = [
MatchLengthCriteria(
penalty=0.1,
target_length=random.randint(50, 200),
unit=TextLengthUnitEnum.WORDS,
),
MatchLengthCriteria(
penalty=0.1,
target_length=random.randint(4, 8),
unit=TextLengthUnitEnum.SENTENCES,
),
]
# scope 1: bullet points, scope 2: numbered list, scope 3: simple layout
scope = random.randint(1, 3)

select_bullet_point_layout = scope == 1
select_numbered_list_layout = scope == 2

# scope 1 or 2: define criteria set for bullet points or numbered list
if select_bullet_point_layout or select_numbered_list_layout:
if select_bullet_point_layout:
layout_criteria = MatchLayoutCriteria(
layout_type=LayoutMatchTypeEnum.UNORDERED_LIST,
penalty=0.5,
text="Your response should be ordered in format of bullet points.",
)
else:
layout_criteria = MatchLayoutCriteria(
layout_type=LayoutMatchTypeEnum.NUMBERED_LIST,
penalty=0.5,
)

possible_other_criterion = [
MatchLengthCriteria(
penalty=0.25,
target_length=random.randint(100, 200),
unit=TextLengthUnitEnum.WORDS,
),
MatchLengthCriteria(
penalty=0.25,
target_length=random.randint(8, 12),
unit=TextLengthUnitEnum.SENTENCES,
),
]
# scope 3: define criteria set for simple layout
else:
layout_criteria = SimpleResponseLayoutCriteria(penalty=0.5)

possible_other_criterion = [
MatchLengthCriteria(
penalty=0.25,
target_length=random.randint(50, 200),
unit=TextLengthUnitEnum.WORDS,
),
MatchLengthCriteria(
penalty=0.25,
target_length=random.randint(4, 8),
unit=TextLengthUnitEnum.SENTENCES,
),
]

sampled_criterias = random.sample(possible_criterias, 1)
random_sampled_criterion = random.sample(possible_other_criterion, 1)
defined_criteria = [layout_criteria] + random_sampled_criterion

return SummaryTask(
base_text=base_text,
criteria=sampled_criterias,
criteria=defined_criteria,
task_type="summarization",
task_name="augment",
)
Expand Down Expand Up @@ -192,12 +231,12 @@ def create_qg_task(base_text: str, index: int) -> QuestionGenerationTask:

other_random_criteria = [
MatchLengthCriteria(
penalty=0.1,
penalty=0.25,
target_length=random.randint(10, 40),
unit=TextLengthUnitEnum.WORDS,
),
MatchLengthCriteria(
penalty=0.1,
penalty=0.25,
target_length=random.randint(40, 150),
unit=TextLengthUnitEnum.CHARACTERS,
),
Expand All @@ -221,16 +260,16 @@ def create_qa_task(base_text: str, index: int) -> QuestionAnswerTask:
answer_should_not_include_criteria = MatchContentCriteria(
words_array=["?"],
n_words=1,
penalty=0.2,
penalty=0.25,
contentMatchType=ContentMatchTypeEnum.INCLUDES,
negate_match=True,
text="Your response should not include any question marks",
)

simple_response_layout_criteria = SimpleResponseLayoutCriteria(penalty=0.2)
simple_response_layout_criteria = SimpleResponseLayoutCriteria(penalty=0.25)

words_criteria = MatchLengthCriteria(
penalty=0.2,
penalty=0.25,
target_length=random.randint(50, 200),
unit=TextLengthUnitEnum.WORDS,
)
Expand Down

0 comments on commit 16aaacd

Please sign in to comment.