From 8a6b25511d21b306bca8205914bf26047cd547d6 Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Mon, 11 Sep 2023 22:48:25 +0000 Subject: [PATCH 1/8] (WIP) port over BTLM miner example --- {miners => baseminer}/__init__.py | 0 {miners => baseminer}/blacklist.py | 0 {miners => baseminer}/config.py | 0 {miners => baseminer}/forward.py | 0 {miners => baseminer}/miner.py | 0 {miners => baseminer}/mock.py | 0 {miners => baseminer}/priority.py | 0 {miners => baseminer}/revolution_miner.py | 43 +++-- {miners => baseminer}/run.py | 0 {miners => baseminer}/set_weights.py | 0 neurons/bittensorLM/README.md | 123 ++++++++++++++ neurons/bittensorLM/bittensorlm.py | 192 ++++++++++++++++++++++ neurons/bittensorLM/requirements.txt | 4 + neurons/miner.py | 34 ---- prompting/protocol.py | 4 + 15 files changed, 349 insertions(+), 51 deletions(-) rename {miners => baseminer}/__init__.py (100%) rename {miners => baseminer}/blacklist.py (100%) rename {miners => baseminer}/config.py (100%) rename {miners => baseminer}/forward.py (100%) rename {miners => baseminer}/miner.py (100%) rename {miners => baseminer}/mock.py (100%) rename {miners => baseminer}/priority.py (100%) rename {miners => baseminer}/revolution_miner.py (91%) rename {miners => baseminer}/run.py (100%) rename {miners => baseminer}/set_weights.py (100%) create mode 100644 neurons/bittensorLM/README.md create mode 100644 neurons/bittensorLM/bittensorlm.py create mode 100644 neurons/bittensorLM/requirements.txt delete mode 100644 neurons/miner.py diff --git a/miners/__init__.py b/baseminer/__init__.py similarity index 100% rename from miners/__init__.py rename to baseminer/__init__.py diff --git a/miners/blacklist.py b/baseminer/blacklist.py similarity index 100% rename from miners/blacklist.py rename to baseminer/blacklist.py diff --git a/miners/config.py b/baseminer/config.py similarity index 100% rename from miners/config.py rename to baseminer/config.py diff --git a/miners/forward.py b/baseminer/forward.py similarity index 100% rename from miners/forward.py rename to baseminer/forward.py diff --git a/miners/miner.py b/baseminer/miner.py similarity index 100% rename from miners/miner.py rename to baseminer/miner.py diff --git a/miners/mock.py b/baseminer/mock.py similarity index 100% rename from miners/mock.py rename to baseminer/mock.py diff --git a/miners/priority.py b/baseminer/priority.py similarity index 100% rename from miners/priority.py rename to baseminer/priority.py diff --git a/miners/revolution_miner.py b/baseminer/revolution_miner.py similarity index 91% rename from miners/revolution_miner.py rename to baseminer/revolution_miner.py index 53063fd..aad71aa 100644 --- a/miners/revolution_miner.py +++ b/baseminer/revolution_miner.py @@ -16,6 +16,7 @@ # DEALINGS IN THE SOFTWARE. import os +import copy import time import wandb import argparse @@ -29,16 +30,17 @@ import bittensor as bt from prompting.protocol import Prompting -from miners.priority import priority -from miners.blacklist import blacklist -from miners.run import run -from miners.set_weights import set_weights -from miners.config import check_config, get_config +from baseminer.priority import priority +from baseminer.blacklist import blacklist +from baseminer.run import run +from baseminer.set_weights import set_weights +from baseminer.config import check_config, get_config + class Miner(ABC): """ The Miner class is an abstract base class that defines the structure for Bittensor miners. - Subclasses should implement the `prompt` method to define their own response logic. + Subclassed should implement the `prompt` method to define their own response logic. The `blacklist` and `priority` methods can also be overridden to provide custom logic. """ @@ -52,8 +54,11 @@ def __init__(self, config=None, axon=None, wallet=None, subtensor=None): wallet: Bittensor Wallet object which holds cryptographic keys. subtensor: Bittensor Subtensor object which manages the blockchain connection. """ - # Setup and check config - self.config = config or get_config() + # Setup base config from Miner.config() and merge with subclassed config. + base_config = copy.deepcopy(config or get_config()) + self.config = self.config() + self.config.merge(base_config) + check_config(Miner, self.config) bt.logging.info(self.config) # TODO: duplicate print? @@ -123,11 +128,20 @@ def __init__(self, config=None, axon=None, wallet=None, subtensor=None): self.request_timestamps: Dict = {} + @abstractmethod + def config(self) -> "bt.Config": + ... + + @classmethod + @abstractmethod + def add_args(cls, parser: argparse.ArgumentParser): + ... + @abstractmethod def prompt(self, synapse: Prompting) -> Prompting: ... - def blacklist(self, synapse: Prompting) -> bool: + def blacklist(self, synapse: Prompting) -> Tuple[bool, str]: """ Default blacklist logic @@ -144,11 +158,9 @@ def blacklist(self, synapse: Prompting) -> bool: Returns: blacklisted (:obj:`bool`): """ - def _blacklist( - synapse: "Prompting" - ) -> Union[Tuple[bool, str], bool]: + def _blacklist(synapse: "Prompting") -> Tuple[bool, str]: raise NotImplementedError("blacklist not implemented in subclass") - + return blacklist(self, _blacklist, synapse) def priority(self, synapse: Prompting) -> float: @@ -169,14 +181,11 @@ def priority(self, synapse: Prompting) -> float: priority (:obj:`float`): """ - def _priority( - synapse: "Prompting" - ) -> Union[Tuple[bool, str], bool]: + def _priority(synapse: "Prompting") -> bool: raise NotImplementedError("priority not implemented in subclass") return priority(self, _priority, synapse) - def run(self): """ Runs the miner logic. This method starts the miner's operations, including diff --git a/miners/run.py b/baseminer/run.py similarity index 100% rename from miners/run.py rename to baseminer/run.py diff --git a/miners/set_weights.py b/baseminer/set_weights.py similarity index 100% rename from miners/set_weights.py rename to baseminer/set_weights.py diff --git a/neurons/bittensorLM/README.md b/neurons/bittensorLM/README.md new file mode 100644 index 0000000..1ae6f5f --- /dev/null +++ b/neurons/bittensorLM/README.md @@ -0,0 +1,123 @@ +## Bittensor LM (BTLM) Miner +Bittensor LM Language Model +This code is for running the very small (3B) Bittensor Language Model created by btlm. + +# Install +``` +python3 -m pip install -r openminers/text_to_text/bittensor_lm/requirements.txt +``` + +# Example Usage +``` +python3 openminers/text_to_text/bittensor_lm/miner.py +``` + +# Full Usage +``` +usage: miner.py [-h] [--btlm.device btlm.DEVICE] [--btlm.max_length btlm.MAX_LENGTH] [--btlm.do_sample] + [--btlm.no_repeat_ngram_size btlm.NO_REPEAT_NGRAM_SIZE] [--btlm.use_deepspeed] + [--btlm.do_prompt_injection] [--btlm.system_prompt SYSTEM_PROMPT] [--btlm.use_vanilla_process_history] + [--netuid NETUID] [--neuron.name NEURON.NAME] [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--neuron.no_set_weights] + [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] + [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...]] [--neuron.blacklist.allow_non_registered] + [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] [--neuron.default_priority NEURON.DEFAULT_PRIORITY] + [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] + [--wallet.reregister WALLET.REREGISTER] [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] + [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] + [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] + [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] + [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] + [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] + [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] + [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] + [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] + [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] + [--logging.logging_dir LOGGING.LOGGING_DIR] [--metagraph._mock] [--config CONFIG] [--strict] + +optional arguments: + -h, --help show this help message and exit + --btlm.device btlm.DEVICE + Device to load model + --btlm.max_length btlm.MAX_LENGTH + The maximum length (in tokens) of the generated text. + --btlm.do_sample Whether to use sampling or not (if not, uses greedy decoding). + --btlm.no_repeat_ngram_size btlm.NO_REPEAT_NGRAM_SIZE + The size of the n-grams to avoid repeating in the generated text. + --btlm.model_size {1.3B,2.7B,6.7B,13B} + Model size to use. + --netuid NETUID Subnet netuid + --neuron.name NEURON.NAME + Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name + --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH + Blocks until the miner sets weights on chain + --neuron.no_set_weights + If True, the model does not set weights. + --neuron.max_batch_size NEURON.MAX_BATCH_SIZE + The maximum batch size for forward requests. + --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN + The maximum sequence length for forward requests. + --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...] + To blacklist certain hotkeys + --neuron.blacklist.allow_non_registered + If True, the miner will allow non-registered hotkeys to mine. + --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE + Set default stake for miners. + --neuron.default_priority NEURON.DEFAULT_PRIORITY + Set default priority for miners. + --wallet.name WALLET.NAME + The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) + --wallet.hotkey WALLET.HOTKEY + The name of wallet's hotkey. + --wallet.path WALLET.PATH + The path to your bittensor wallets + --wallet._mock To turn on wallet mocking for testing purposes. + --wallet.reregister WALLET.REREGISTER + Whether to reregister the wallet if it is not already registered. + --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS + maximum number of threads in thread pool + --axon.priority.maxsize AXON.PRIORITY.MAXSIZE + maximum size of tasks in priority queue + --axon.port AXON.PORT + The local port this axon endpoint is bound to. i.e. 8091 + --axon.ip AXON.IP The local ip this axon binds to. ie. [::] + --axon.external_port AXON.EXTERNAL_PORT + The public port this axon broadcasts to the network. i.e. 8091 + --axon.external_ip AXON.EXTERNAL_IP + The external ip this axon broadcasts to the network to. ie. [::] + --axon.max_workers AXON.MAX_WORKERS + The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes + new worker threads to service requests up to this number. + --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS + Maximum number of allowed active connections + --subtensor.network SUBTENSOR.NETWORK + The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- + mock (creates a mock connection (for testing)) If this option is set it overloads subtensor.chain_endpoint with an + entry point node from that network. + --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT + The subtensor endpoint flag. If set, overrides the --network flag. + --subtensor._mock To turn on subtensor mocking for testing purposes. + --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES + Number of processors to use for registration + --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL + The number of nonces to process before checking for next block during registration + --subtensor.register.no_output_in_place, --no_output_in_place + Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. + --subtensor.register.verbose + Whether to ouput the registration statistics verbosely. + --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda + Set flag to use CUDA to register. + --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda + Set flag to not use CUDA for registration + --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] + Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). + --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB + Set the number of Threads Per Block for CUDA. + --logging.debug Turn on bittensor debugging information + --logging.trace Turn on bittensor trace level information + --logging.record_log Turns on logging to file. + --logging.logging_dir LOGGING.LOGGING_DIR + Logging default root directory. + --metagraph._mock To turn on metagraph mocking for testing purposes. + --config CONFIG If set, defaults are overridden by passed file. + --strict If flagged, config will check that only exact arguemnts have been set. + ``` \ No newline at end of file diff --git a/neurons/bittensorLM/bittensorlm.py b/neurons/bittensorLM/bittensorlm.py new file mode 100644 index 0000000..d77be10 --- /dev/null +++ b/neurons/bittensorLM/bittensorlm.py @@ -0,0 +1,192 @@ +# The MIT License (MIT) +# Copyright © 2021 Yuma Rao + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the “Software”), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of +# the Software. + +# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + + +import os +import time +import argparse +import bittensor as bt +import deepspeed +from typing import List, Dict + +import torch +from transformers import ( + AutoTokenizer, + AutoModelForCausalLM, + StoppingCriteria, + StoppingCriteriaList, + pipeline, +) + +from baseminer.revolution_miner import Miner +from prompting.protocol import Prompting + +class StopOnTokens(StoppingCriteria): + def __init__(self, stop_token_ids: List[int]): + self.stop_token_ids = stop_token_ids + + def __call__( + self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs + ) -> bool: + for stop_id in self.stop_token_ids: + if input_ids[0][-1] == stop_id: + return True + return False + + +class CerebrasBTLMMiner(Miner): + + @classmethod + def config(cls) -> "bt.Config": + parser = argparse.ArgumentParser(description="Bittensor-LM Miner Config") + cls.add_args(parser) + return bt.config(parser) + + @classmethod + def add_args(cls, parser: argparse.ArgumentParser): + parser.add_argument( + "--btlm.device", type=str, help="Device to load model", default="cuda" + ) + parser.add_argument( + "--btlm.max_length", + type=int, + default=50, + help="The maximum length (in tokens) of the generated text.", + ) + parser.add_argument( + "--btlm.do_sample", + action="store_true", + default=False, + help="Whether to use sampling or not (if not, uses greedy decoding).", + ) + parser.add_argument( + "--btlm.no_repeat_ngram_size", + type=int, + default=2, + help="The size of the n-grams to avoid repeating in the generated text.", + ) + parser.add_argument( + "--btlm.do_prompt_injection", + action="store_true", + default=False, + help='Whether to use a custom "system" prompt instead of the one sent by bittensor.', + ) + parser.add_argument( + "--btlm.system_prompt", + type=str, + help="What prompt to replace the system prompt with", + default="A chat between a curious user and an artificial intelligence assistant.\nThe assistant gives helpful, detailed, and polite answers to the user's questions. ", + ) + parser.add_argument( + "--btlm.use_deepspeed", + action="store_true", + default=False, + help="Whether to use deepspeed or not (if not, uses vanilla huggingface).", + ) + parser.add_argument( + "--btlm.temperature", type=float, default=0.7, help="Sampling temperature." + ) + + def __init__(self, *args, **kwargs): + super(CerebrasBTLMMiner, self).__init__(*args, **kwargs) + print(self.config) + + bt.logging.info("Loading BTLM 3B model...") + model = AutoModelForCausalLM.from_pretrained( + "cerebras/btlm-3b-8k-base", + trust_remote_code=True, + low_cpu_mem_usage=True, + torch_dtype="auto", + ) + tokenizer = AutoTokenizer.from_pretrained( + "cerebras/btlm-3b-8k-base", + trust_remote_code=True, + ) + self.stop_token_ids = tokenizer.convert_tokens_to_ids(["<|endoftext|>"]) + self.stop = StopOnTokens(self.stop_token_ids) + + # Determine correct device id (int) from device string. + if self.config.btlm.device == "cuda": + self.config.btlm.device = 0 + elif len(self.config.btlm.device.split(":") == 2): + try: + self.config.btlm.device = int(self.config.btlm.device.split(":")[1]) + except: + raise ValueError( + "Invalid device string: {}".format(self.config.btlm.device) + ) + + self.pipe = pipeline( + "text-generation", + model=model, + tokenizer=tokenizer, + device=self.config.btlm.device, + do_sample=self.config.btlm.do_sample, + max_new_tokens=self.config.btlm.max_length, + no_repeat_ngram_size=self.config.btlm.no_repeat_ngram_size, + ) + + if self.config.btlm.use_deepspeed: + self.pipe.model = deepspeed.init_inference( + self.pipe.model, + mp_size=int(os.getenv("WORLD_SIZE", "1")), + dtype=torch.float, + replace_with_kernel_inject=False, + ) + + def _process_history(self, history: List[Dict[str, str]]) -> str: + processed_history = "" + if self.config.btlm.do_prompt_injection: + processed_history += self.config.btlm.system_prompt + for message in history: + if message["role"] == "system": + if not self.config.btlm.do_prompt_injection or message != history[0]: + processed_history += "system: " + message["content"] + "\n" + if message["role"] == "assistant": + processed_history += "assistant: " + message["content"] + "\n" + if message["role"] == "user": + processed_history += "user: " + message["content"] + "\n" + return processed_history + + def prompt(self, synapse: Prompting) -> Prompting: + history = self._process_history(messages) + history += "assistant: " + bt.logging.debug("History: {}".format(history)) + generation = ( + self.pipe( + history, + temperature=self.config.btlm.temperature, + max_new_tokens=self.config.btlm.max_length, + no_repeat_ngram_size=self.config.btlm.no_repeat_ngram_size, + do_sample=self.config.btlm.do_sample, + eos_token_id=self.pipe.tokenizer.eos_token_id, + pad_token_id=self.pipe.tokenizer.pad_token_id, + stopping_criteria=StoppingCriteriaList([self.stop]), + )[0]["generated_text"] + .split(":")[-1] + .replace(str(history), "") + ) + bt.logging.debug("Generation: {}".format(generation)) + return generation + + +if __name__ == "__main__": + bt.debug() + miner = CerebrasBTLMMiner() + with miner: + while True: + time.sleep(1) \ No newline at end of file diff --git a/neurons/bittensorLM/requirements.txt b/neurons/bittensorLM/requirements.txt new file mode 100644 index 0000000..7755304 --- /dev/null +++ b/neurons/bittensorLM/requirements.txt @@ -0,0 +1,4 @@ +transformers>=4.28.0, <=4.31.0 +xformers +accelerate +deepspeed \ No newline at end of file diff --git a/neurons/miner.py b/neurons/miner.py deleted file mode 100644 index 021ece8..0000000 --- a/neurons/miner.py +++ /dev/null @@ -1,34 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao -# TODO(developer): Set your name -# Copyright © 2023 - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# Bittensor Miner Template:# Step 1: Import necessary libraries and modules - -from miners.revolution_miner import Miner -from prompting.protocol import Prompting -import bittensor as bt - -class TemplateMiner(Miner): - def prompt(self, synapse: Prompting) -> Prompting: - bt.logging.debug("In prompt!") - synapse.completion = "I am a chatbot" - return synapse - -# This is the main function, which runs the miner. -if __name__ == "__main__": - TemplateMiner().run() diff --git a/prompting/protocol.py b/prompting/protocol.py index b691e18..e6d1352 100644 --- a/prompting/protocol.py +++ b/prompting/protocol.py @@ -31,6 +31,10 @@ class Config: def deserialize(self): return self + # @property + # def required_hash_fields(self) -> List[str]: + # return ['messages'] + roles: List[str] = pydantic.Field(..., allow_mutation=False) messages: List[str] = pydantic.Field(..., allow_mutation=False) completion: str = '' From 11b5e0e2a705b8785b591575ba0971ae24e6b815 Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Tue, 12 Sep 2023 01:08:26 +0000 Subject: [PATCH 2/8] fix btlm forward -> prompt, black format --- baseminer/blacklist.py | 4 +- baseminer/priority.py | 5 +- baseminer/revolution_miner.py | 3 +- baseminer/run.py | 2 +- neurons/bittensorLM/bittensorlm.py | 32 ++-- neurons/template.py | 36 +++++ neurons/validator.py | 86 ++++++++--- prompting/protocol.py | 2 +- prompting/validators/__init__.py | 6 +- prompting/validators/config.py | 22 ++- prompting/validators/dataset.py | 27 ++-- prompting/validators/event.py | 137 ++++++++++++------ prompting/validators/forward.py | 79 ++++++---- prompting/validators/gating.py | 42 ++++-- prompting/validators/misc.py | 2 + prompting/validators/mock.py | 16 +- prompting/validators/prompts.py | 41 ++++-- prompting/validators/reward/__init__.py | 2 +- prompting/validators/reward/blacklist.py | 31 ++-- prompting/validators/reward/config.py | 21 +-- prompting/validators/reward/dahoas.py | 63 ++++---- prompting/validators/reward/diversity.py | 100 +++++++------ prompting/validators/reward/dpo.py | 100 ++++++++----- prompting/validators/reward/nsfw.py | 55 ++++--- prompting/validators/reward/open_assistant.py | 39 +++-- prompting/validators/reward/prompt.py | 58 +++++--- prompting/validators/reward/reciprocate.py | 50 ++++--- prompting/validators/reward/relevance.py | 91 +++++++----- prompting/validators/reward/reward.py | 97 ++++++++----- prompting/validators/reward/task_validator.py | 63 +++++--- prompting/validators/utils.py | 77 ++++++---- prompting/validators/weights.py | 10 +- 32 files changed, 903 insertions(+), 496 deletions(-) create mode 100644 neurons/template.py diff --git a/baseminer/blacklist.py b/baseminer/blacklist.py index d05577c..6ead4d7 100644 --- a/baseminer/blacklist.py +++ b/baseminer/blacklist.py @@ -51,9 +51,7 @@ def is_prompt_in_cache(self, synapse: Prompting) -> bool: return should_blacklist -def default_blacklist( - self, synapse: Prompting -) -> Union[Tuple[bool, str], bool]: +def default_blacklist(self, synapse: Prompting) -> Union[Tuple[bool, str], bool]: # Check if the key is white listed. if synapse.dendrite.hotkey in self.config.miner.blacklist.whitelist: return False, "whitelisted hotkey" diff --git a/baseminer/priority.py b/baseminer/priority.py index c7a6c75..1e7b133 100644 --- a/baseminer/priority.py +++ b/baseminer/priority.py @@ -21,6 +21,7 @@ from typing import List, Dict, Union, Tuple, Callable from prompting.protocol import Prompting + def record_request_timestamps(self, synapse: Prompting): timestamp_length = self.config.miner.priority.len_request_timestamps if synapse.dendrite.hotkey not in self.request_timestamps: @@ -64,9 +65,7 @@ def default_priority(self, synapse: Prompting) -> float: return priority -def priority( - self, func: Callable, synapse: Prompting -) -> float: +def priority(self, func: Callable, synapse: Prompting) -> float: # Check to see if the subclass has implemented a priority function. priority = None try: diff --git a/baseminer/revolution_miner.py b/baseminer/revolution_miner.py index aad71aa..e03f06a 100644 --- a/baseminer/revolution_miner.py +++ b/baseminer/revolution_miner.py @@ -158,6 +158,7 @@ def blacklist(self, synapse: Prompting) -> Tuple[bool, str]: Returns: blacklisted (:obj:`bool`): """ + def _blacklist(synapse: "Prompting") -> Tuple[bool, str]: raise NotImplementedError("blacklist not implemented in subclass") @@ -183,7 +184,7 @@ def priority(self, synapse: Prompting) -> float: def _priority(synapse: "Prompting") -> bool: raise NotImplementedError("priority not implemented in subclass") - + return priority(self, _priority, synapse) def run(self): diff --git a/baseminer/run.py b/baseminer/run.py index cc3f572..f24fdaf 100644 --- a/baseminer/run.py +++ b/baseminer/run.py @@ -134,7 +134,7 @@ def run(self): self.axon.stop() bt.logging.success("Miner killed by keyboard interrupt.") exit() - + # In case of unforeseen errors, the miner will log the error and continue operations. except Exception as e: bt.logging.error(traceback.format_exc()) diff --git a/neurons/bittensorLM/bittensorlm.py b/neurons/bittensorLM/bittensorlm.py index d77be10..1c95409 100644 --- a/neurons/bittensorLM/bittensorlm.py +++ b/neurons/bittensorLM/bittensorlm.py @@ -35,6 +35,7 @@ from baseminer.revolution_miner import Miner from prompting.protocol import Prompting + class StopOnTokens(StoppingCriteria): def __init__(self, stop_token_ids: List[int]): self.stop_token_ids = stop_token_ids @@ -49,7 +50,6 @@ def __call__( class CerebrasBTLMMiner(Miner): - @classmethod def config(cls) -> "bt.Config": parser = argparse.ArgumentParser(description="Bittensor-LM Miner Config") @@ -64,7 +64,7 @@ def add_args(cls, parser: argparse.ArgumentParser): parser.add_argument( "--btlm.max_length", type=int, - default=50, + default=100, help="The maximum length (in tokens) of the generated text.", ) parser.add_argument( @@ -103,7 +103,6 @@ def add_args(cls, parser: argparse.ArgumentParser): def __init__(self, *args, **kwargs): super(CerebrasBTLMMiner, self).__init__(*args, **kwargs) - print(self.config) bt.logging.info("Loading BTLM 3B model...") model = AutoModelForCausalLM.from_pretrained( @@ -148,25 +147,25 @@ def __init__(self, *args, **kwargs): replace_with_kernel_inject=False, ) - def _process_history(self, history: List[Dict[str, str]]) -> str: + def _process_history(self, roles: List[str], messages: List[str]) -> str: processed_history = "" if self.config.btlm.do_prompt_injection: processed_history += self.config.btlm.system_prompt - for message in history: - if message["role"] == "system": + for role, message in zip(roles, messages): + if role == "system": if not self.config.btlm.do_prompt_injection or message != history[0]: - processed_history += "system: " + message["content"] + "\n" - if message["role"] == "assistant": - processed_history += "assistant: " + message["content"] + "\n" - if message["role"] == "user": - processed_history += "user: " + message["content"] + "\n" + processed_history += "system: " + message + "\n" + if role == "assistant": + processed_history += "assistant: " + message + "\n" + if role == "user": + processed_history += "user: " + message + "\n" return processed_history def prompt(self, synapse: Prompting) -> Prompting: - history = self._process_history(messages) + history = self._process_history(roles=synapse.roles, messages=synapse.messages) history += "assistant: " bt.logging.debug("History: {}".format(history)) - generation = ( + completion = ( self.pipe( history, temperature=self.config.btlm.temperature, @@ -180,8 +179,9 @@ def prompt(self, synapse: Prompting) -> Prompting: .split(":")[-1] .replace(str(history), "") ) - bt.logging.debug("Generation: {}".format(generation)) - return generation + bt.logging.debug("Completion: {}".format(completion)) + synapse.completion = completion + return synapse if __name__ == "__main__": @@ -189,4 +189,4 @@ def prompt(self, synapse: Prompting) -> Prompting: miner = CerebrasBTLMMiner() with miner: while True: - time.sleep(1) \ No newline at end of file + time.sleep(1) diff --git a/neurons/template.py b/neurons/template.py new file mode 100644 index 0000000..a192d73 --- /dev/null +++ b/neurons/template.py @@ -0,0 +1,36 @@ +# The MIT License (MIT) +# Copyright © 2023 Yuma Rao +# TODO(developer): Set your name +# Copyright © 2023 + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the “Software”), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of +# the Software. + +# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Bittensor Miner Template:# Step 1: Import necessary libraries and modules + +from miners.revolution_miner import Miner +from prompting.protocol import Prompting +import bittensor as bt + + +class TemplateMiner(Miner): + def prompt(self, synapse: Prompting) -> Prompting: + bt.logging.debug("In prompt!") + synapse.completion = "I am a chatbot" + return synapse + + +# This is the main function, which runs the miner. +if __name__ == "__main__": + TemplateMiner().run() diff --git a/neurons/validator.py b/neurons/validator.py index 8ff26df..a76c5e7 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -30,7 +30,15 @@ # Load local forward function. from prompting.validators.config import add_args, check_config, config from prompting.validators.forward import forward -from prompting.validators.utils import should_checkpoint, checkpoint, should_reinit_wandb, reinit_wandb, load_state, save_state, init_wandb +from prompting.validators.utils import ( + should_checkpoint, + checkpoint, + should_reinit_wandb, + reinit_wandb, + load_state, + save_state, + init_wandb, +) from prompting.validators.weights import should_set_weights, set_weights from prompting.validators.misc import ttl_get_block @@ -50,6 +58,7 @@ RewardModelType, ) + class neuron: @classmethod def check_config(cls, config: "bt.Config"): @@ -89,15 +98,21 @@ def __init__(self): self.wallet = bt.wallet(config=self.config) self.wallet.create_if_non_existent() if not self.config.wallet._mock: - if not self.subtensor.is_hotkey_registered_on_subnet(hotkey_ss58=self.wallet.hotkey.ss58_address, netuid=self.config.netuid): - raise Exception(f'Wallet not currently registered on netuid {self.config.netuid}, please first register wallet before running') - + if not self.subtensor.is_hotkey_registered_on_subnet( + hotkey_ss58=self.wallet.hotkey.ss58_address, netuid=self.config.netuid + ): + raise Exception( + f"Wallet not currently registered on netuid {self.config.netuid}, please first register wallet before running" + ) + bt.logging.debug(str(self.wallet)) # Init metagraph. bt.logging.debug("loading", "metagraph") - self.metagraph = bt.metagraph(netuid=self.config.netuid, network=self.subtensor.network, sync=False) # Make sure not to sync without passing subtensor - self.metagraph.sync(subtensor=self.subtensor) # Sync metagraph with subtensor. + self.metagraph = bt.metagraph( + netuid=self.config.netuid, network=self.subtensor.network, sync=False + ) # Make sure not to sync without passing subtensor + self.metagraph.sync(subtensor=self.subtensor) # Sync metagraph with subtensor. self.hotkeys = copy.deepcopy(self.metagraph.hotkeys) bt.logging.debug(str(self.metagraph)) @@ -122,16 +137,20 @@ def __init__(self): if self.config.neuron.mock_gating_model: self.gating_model = MockGatingModel(self.metagraph.n.item()) elif self.config.neuron.use_custom_gating_model: - self.gating_model = SentenceEmbedGatingModel(metagraph=self.metagraph, config=self.config).to(self.device) + self.gating_model = SentenceEmbedGatingModel( + metagraph=self.metagraph, config=self.config + ).to(self.device) else: - self.gating_model = GatingModel(metagraph=self.metagraph, config=self.config).to(self.device) + self.gating_model = GatingModel( + metagraph=self.metagraph, config=self.config + ).to(self.device) bt.logging.debug(str(self.gating_model)) if not self.config.neuron.axon_off: - bt.logging.debug('serving ip to chain...') + bt.logging.debug("serving ip to chain...") try: - axon = bt.axon( - wallet=self.wallet, metagraph=self.metagraph, config=self.config + axon = bt.axon( + wallet=self.wallet, metagraph=self.metagraph, config=self.config ) try: @@ -142,23 +161,25 @@ def __init__(self): wait_for_finalization=True, ) except Exception as e: - bt.logging.error(f'Failed to serve Axon with exception: {e}') + bt.logging.error(f"Failed to serve Axon with exception: {e}") pass del axon except Exception as e: - bt.logging.error(f'Failed to create Axon initialize with exception: {e}') + bt.logging.error( + f"Failed to create Axon initialize with exception: {e}" + ) pass else: - bt.logging.debug('axon off, not serving ip to chain.') + bt.logging.debug("axon off, not serving ip to chain.") # Dendrite pool for querying the network during training. bt.logging.debug("loading", "dendrite_pool") if self.config.neuron.mock_dendrite_pool: self.dendrite = MockDendrite() else: - self.dendrite = bt.dendrite( wallet = self.wallet ) + self.dendrite = bt.dendrite(wallet=self.wallet) bt.logging.debug(str(self.dendrite)) # Init Reward model @@ -219,29 +240,41 @@ def __init__(self): bt.logging.error(message) raise Exception(message) - + # Masking functions self.blacklist = ( - Blacklist() if not self.config.neuron.blacklist_off else MockRewardModel(RewardModelType.blacklist.value) + Blacklist() + if not self.config.neuron.blacklist_off + else MockRewardModel(RewardModelType.blacklist.value) ) task_validator = ( - TaskValidator() if not self.config.neuron.task_validator_off + TaskValidator() + if not self.config.neuron.task_validator_off else MockRewardModel(RewardModelType.task_validator.value) ) relevance_model = ( - RelevanceRewardModel(device=self.device) if not self.config.neuron.relevance_off + RelevanceRewardModel(device=self.device) + if not self.config.neuron.relevance_off else MockRewardModel(RewardModelType.relevance.value) ) self.diversity_model = ( - DiversityRewardModel(device=self.device) if not self.config.neuron.diversity_off + DiversityRewardModel(device=self.device) + if not self.config.neuron.diversity_off else MockRewardModel(RewardModelType.diversity.value) ) nsfw_model = ( - NSFWRewardModel(device=self.device) if not self.config.neuron.nsfw_off - else MockRewardModel(RewardModelType.nsfw.value) + NSFWRewardModel(device=self.device) + if not self.config.neuron.nsfw_off + else MockRewardModel(RewardModelType.nsfw.value) ) - self.masking_functions = [self.blacklist, task_validator, relevance_model, self.diversity_model, nsfw_model] + self.masking_functions = [ + self.blacklist, + task_validator, + relevance_model, + self.diversity_model, + nsfw_model, + ] bt.logging.debug(str(self.reward_functions)) bt.logging.debug(str(self.masking_functions)) @@ -271,7 +304,10 @@ def run(self): # Run multiple forwards. async def run_forward(): - coroutines = [forward(self) for _ in range(self.config.neuron.num_concurrent_forwards)] + coroutines = [ + forward(self) + for _ in range(self.config.neuron.num_concurrent_forwards) + ] await asyncio.gather(*coroutines) self.loop.run_until_complete(run_forward()) @@ -296,8 +332,10 @@ async def run_forward(): bt.logging.error("Error in training loop", str(e)) bt.logging.debug(print_exception(value=e)) + def main(): neuron().run() + if __name__ == "__main__": main() diff --git a/prompting/protocol.py b/prompting/protocol.py index e6d1352..eb2b79d 100644 --- a/prompting/protocol.py +++ b/prompting/protocol.py @@ -37,4 +37,4 @@ def deserialize(self): roles: List[str] = pydantic.Field(..., allow_mutation=False) messages: List[str] = pydantic.Field(..., allow_mutation=False) - completion: str = '' + completion: str = "" diff --git a/prompting/validators/__init__.py b/prompting/validators/__init__.py index 1cd8d5c..e21e9c6 100644 --- a/prompting/validators/__init__.py +++ b/prompting/validators/__init__.py @@ -28,4 +28,8 @@ __version__ = "1.2.0" version_split = __version__.split(".") -__spec_version__ = (1000 * int(version_split[0])) + (10 * int(version_split[1])) + (1 * int(version_split[2])) +__spec_version__ = ( + (1000 * int(version_split[0])) + + (10 * int(version_split[1])) + + (1 * int(version_split[2])) +) diff --git a/prompting/validators/config.py b/prompting/validators/config.py index e334d33..7e69e5d 100644 --- a/prompting/validators/config.py +++ b/prompting/validators/config.py @@ -27,8 +27,8 @@ def check_config(cls, config: "bt.Config"): r"""Checks/validates the config namespace object.""" bt.logging.check_config(config) - #bt.wallet.check_config(config) - #bt.subtensor.check_config(config) + # bt.wallet.check_config(config) + # bt.subtensor.check_config(config) if config.mock: config.neuron.mock_reward_models = True @@ -66,7 +66,9 @@ def check_config(cls, config: "bt.Config"): def add_args(cls, parser): # Netuid Arg - parser.add_argument("--netuid", type=int, help="Prompting network netuid", default=1) + parser.add_argument( + "--netuid", type=int, help="Prompting network netuid", default=1 + ) parser.add_argument( "--neuron.name", @@ -119,7 +121,9 @@ def add_args(cls, parser): default=50, ) - parser.add_argument("--neuron.answer_timeout", type=float, help="Answer query timeout.", default=10) + parser.add_argument( + "--neuron.answer_timeout", type=float, help="Answer query timeout.", default=10 + ) parser.add_argument( "--neuron.answer_sample_size", type=int, @@ -175,7 +179,9 @@ def add_args(cls, parser): default=True, ) - parser.add_argument("--wandb.off", action="store_true", help="Turn off wandb.", default=False) + parser.add_argument( + "--wandb.off", action="store_true", help="Turn off wandb.", default=False + ) parser.add_argument( "--wandb.project_name", type=str, @@ -220,7 +226,9 @@ def add_args(cls, parser): ) # Mocks - parser.add_argument("--mock", action="store_true", help="Mock all items.", default=False) + parser.add_argument( + "--mock", action="store_true", help="Mock all items.", default=False + ) parser.add_argument( "--neuron.mock_reward_models", action="store_true", @@ -256,7 +264,7 @@ def add_args(cls, parser): action="store_true", help="Dont apply the task validator reward model", default=False, - ) + ) parser.add_argument( "--reward.reciprocate_weight", diff --git a/prompting/validators/dataset.py b/prompting/validators/dataset.py index 96683c4..301305b 100644 --- a/prompting/validators/dataset.py +++ b/prompting/validators/dataset.py @@ -20,16 +20,28 @@ from datasets import load_dataset from collections.abc import Iterator + class Dataset(Iterator): def __init__(self): super().__init__() - seed = random.randint(0,1000) - self.openwebtext = iter( load_dataset("openwebtext", split="train", streaming=True).shuffle(seed=seed, buffer_size=10000) ) - self.red_pajama = iter( load_dataset("togethercomputer/RedPajama-Data-1T", 'default', split='train', streaming=True).shuffle(seed=seed, buffer_size=10000) ) + seed = random.randint(0, 1000) + self.openwebtext = iter( + load_dataset("openwebtext", split="train", streaming=True).shuffle( + seed=seed, buffer_size=10000 + ) + ) + self.red_pajama = iter( + load_dataset( + "togethercomputer/RedPajama-Data-1T", + "default", + split="train", + streaming=True, + ).shuffle(seed=seed, buffer_size=10000) + ) - def __next__(self): - while True: - bt.logging.debug('Retrieving data from dataset...') + def __next__(self): + while True: + bt.logging.debug("Retrieving data from dataset...") if random.random() < 0.5: text = next(self.openwebtext)["text"] else: @@ -43,6 +55,3 @@ def __next__(self): class MockDataset(Iterator): def __next__(self): return {"text": "What is the capital of Texas?"} - - - \ No newline at end of file diff --git a/prompting/validators/event.py b/prompting/validators/event.py index b556539..c054de5 100644 --- a/prompting/validators/event.py +++ b/prompting/validators/event.py @@ -36,73 +36,120 @@ class EventSchema: # Reward data rewards: List[float] # Reward vector for given step - dahoas_reward_model: Optional[List[float]] # Output vector of the dahoas reward model + dahoas_reward_model: Optional[ + List[float] + ] # Output vector of the dahoas reward model blacklist_filter: Optional[List[float]] # Output vector of the blacklist filter nsfw_filter: Optional[List[float]] # Output vector of the nsfw filter - reciprocate_reward_model: Optional[List[float]] # Output vector of the reciprocate reward model - diversity_reward_model: Optional[List[float]] # Output vector of the diversity reward model + reciprocate_reward_model: Optional[ + List[float] + ] # Output vector of the reciprocate reward model + diversity_reward_model: Optional[ + List[float] + ] # Output vector of the diversity reward model dpo_reward_model: Optional[List[float]] # Output vector of the dpo reward model rlhf_reward_model: Optional[List[float]] # Output vector of the rlhf reward model - prompt_reward_model: Optional[List[float]] # Output vector of the prompt reward model - relevance_filter: Optional[List[float]] # Output vector of the relevance scoring reward model + prompt_reward_model: Optional[ + List[float] + ] # Output vector of the prompt reward model + relevance_filter: Optional[ + List[float] + ] # Output vector of the relevance scoring reward model task_validator_filter: Optional[List[float]] - dahoas_reward_model_normalized: Optional[List[float]] # Output vector of the dahoas reward model + dahoas_reward_model_normalized: Optional[ + List[float] + ] # Output vector of the dahoas reward model nsfw_filter_normalized: Optional[List[float]] # Output vector of the nsfw filter - reciprocate_reward_model_normalized: Optional[List[float]] # Output vector of the reciprocate reward model - diversity_reward_model_normalized: Optional[List[float]] # Output vector of the diversity reward model - dpo_reward_model_normalized: Optional[List[float]] # Output vector of the dpo reward model - rlhf_reward_model_normalized: Optional[List[float]] # Output vector of the rlhf reward model - prompt_reward_model_normalized: Optional[List[float]] # Output vector of the prompt reward model - relevance_filter_normalized: Optional[List[float]] # Output vector of the relevance scoring reward model + reciprocate_reward_model_normalized: Optional[ + List[float] + ] # Output vector of the reciprocate reward model + diversity_reward_model_normalized: Optional[ + List[float] + ] # Output vector of the diversity reward model + dpo_reward_model_normalized: Optional[ + List[float] + ] # Output vector of the dpo reward model + rlhf_reward_model_normalized: Optional[ + List[float] + ] # Output vector of the rlhf reward model + prompt_reward_model_normalized: Optional[ + List[float] + ] # Output vector of the prompt reward model + relevance_filter_normalized: Optional[ + List[float] + ] # Output vector of the relevance scoring reward model task_validator_filter_normalized: Optional[List[float]] - + # Weights data set_weights: Optional[List[List[float]]] @staticmethod - def from_dict(event_dict: dict, disable_log_rewards: bool) -> 'EventSchema': + def from_dict(event_dict: dict, disable_log_rewards: bool) -> "EventSchema": """Converts a dictionary to an EventSchema object.""" rewards = { - 'blacklist_filter': event_dict.get(RewardModelType.blacklist.value), - 'dahoas_reward_model': event_dict.get(RewardModelType.dahoas.value), - 'task_validator_filter': event_dict.get(RewardModelType.task_validator.value), - 'nsfw_filter': event_dict.get(RewardModelType.nsfw.value), - 'relevance_filter': event_dict.get(RewardModelType.relevance.value), - 'reciprocate_reward_model': event_dict.get(RewardModelType.reciprocate.value), - 'diversity_reward_model': event_dict.get(RewardModelType.diversity.value), - 'dpo_reward_model': event_dict.get(RewardModelType.dpo.value), - 'rlhf_reward_model': event_dict.get(RewardModelType.rlhf.value), - 'prompt_reward_model': event_dict.get(RewardModelType.prompt.value), - - 'dahoas_reward_model_normalized': event_dict.get(RewardModelType.dahoas.value + '_normalized'), - 'task_validator_filter_normalized': event_dict.get(RewardModelType.task_validator.value + '_normalized'), - 'nsfw_filter_normalized': event_dict.get(RewardModelType.nsfw.value + '_normalized'), - 'relevance_filter_normalized': event_dict.get(RewardModelType.relevance.value + '_normalized'), - 'reciprocate_reward_model_normalized': event_dict.get(RewardModelType.reciprocate.value + '_normalized'), - 'diversity_reward_model_normalized': event_dict.get(RewardModelType.diversity.value + '_normalized'), - 'dpo_reward_model_normalized': event_dict.get(RewardModelType.dpo.value + '_normalized'), - 'rlhf_reward_model_normalized': event_dict.get(RewardModelType.rlhf.value + '_normalized'), - 'prompt_reward_model_normalized': event_dict.get(RewardModelType.prompt.value + '_normalized'), + "blacklist_filter": event_dict.get(RewardModelType.blacklist.value), + "dahoas_reward_model": event_dict.get(RewardModelType.dahoas.value), + "task_validator_filter": event_dict.get( + RewardModelType.task_validator.value + ), + "nsfw_filter": event_dict.get(RewardModelType.nsfw.value), + "relevance_filter": event_dict.get(RewardModelType.relevance.value), + "reciprocate_reward_model": event_dict.get( + RewardModelType.reciprocate.value + ), + "diversity_reward_model": event_dict.get(RewardModelType.diversity.value), + "dpo_reward_model": event_dict.get(RewardModelType.dpo.value), + "rlhf_reward_model": event_dict.get(RewardModelType.rlhf.value), + "prompt_reward_model": event_dict.get(RewardModelType.prompt.value), + "dahoas_reward_model_normalized": event_dict.get( + RewardModelType.dahoas.value + "_normalized" + ), + "task_validator_filter_normalized": event_dict.get( + RewardModelType.task_validator.value + "_normalized" + ), + "nsfw_filter_normalized": event_dict.get( + RewardModelType.nsfw.value + "_normalized" + ), + "relevance_filter_normalized": event_dict.get( + RewardModelType.relevance.value + "_normalized" + ), + "reciprocate_reward_model_normalized": event_dict.get( + RewardModelType.reciprocate.value + "_normalized" + ), + "diversity_reward_model_normalized": event_dict.get( + RewardModelType.diversity.value + "_normalized" + ), + "dpo_reward_model_normalized": event_dict.get( + RewardModelType.dpo.value + "_normalized" + ), + "rlhf_reward_model_normalized": event_dict.get( + RewardModelType.rlhf.value + "_normalized" + ), + "prompt_reward_model_normalized": event_dict.get( + RewardModelType.prompt.value + "_normalized" + ), } # Logs warning that expected data was not set properly if not disable_log_rewards and any(value is None for value in rewards.values()): for key, value in rewards.items(): if value is None: - bt.logging.warning(f'EventSchema.from_dict: {key} is None, data will not be logged') + bt.logging.warning( + f"EventSchema.from_dict: {key} is None, data will not be logged" + ) return EventSchema( - completions=event_dict['completions'], - completion_times=event_dict['completion_times'], - name=event_dict['name'], - block=event_dict['block'], - gating_loss=event_dict['gating_loss'], - uids=event_dict['uids'], - prompt=event_dict['prompt'], - step_length=event_dict['step_length'], - best=event_dict['best'], - rewards=event_dict['rewards'], + completions=event_dict["completions"], + completion_times=event_dict["completion_times"], + name=event_dict["name"], + block=event_dict["block"], + gating_loss=event_dict["gating_loss"], + uids=event_dict["uids"], + prompt=event_dict["prompt"], + step_length=event_dict["step_length"], + best=event_dict["best"], + rewards=event_dict["rewards"], **rewards, set_weights=None, ) diff --git a/prompting/validators/forward.py b/prompting/validators/forward.py index 4c6f4bb..37f1308 100644 --- a/prompting/validators/forward.py +++ b/prompting/validators/forward.py @@ -45,9 +45,11 @@ def get_random_uids(self, k: int, exclude: List[int] = None) -> torch.LongTensor """ candidate_uids = [] avail_uids = [] - + for uid in range(self.metagraph.n.item()): - uid_is_available = check_uid_availability(self.metagraph, uid, self.config.neuron.vpermit_tao_limit) + uid_is_available = check_uid_availability( + self.metagraph, uid, self.config.neuron.vpermit_tao_limit + ) uid_is_not_excluded = exclude is None or uid not in exclude if uid_is_available: @@ -58,13 +60,23 @@ def get_random_uids(self, k: int, exclude: List[int] = None) -> torch.LongTensor # Check if candidate_uids contain enough for querying, if not grab all avaliable uids available_uids = candidate_uids if len(candidate_uids) < k: - available_uids += random.sample([uid for uid in avail_uids if uid not in candidate_uids], k-len(candidate_uids)) + available_uids += random.sample( + [uid for uid in avail_uids if uid not in candidate_uids], + k - len(candidate_uids), + ) uids = torch.tensor(random.sample(available_uids, k)) return uids -async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude: list = [], base_prompt = None): - +async def run_step( + self, + prompt: str, + k: int, + timeout: float, + name: str, + exclude: list = [], + base_prompt=None, +): if base_prompt == None: base_prompt = prompt @@ -74,12 +86,9 @@ async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude event = {"name": name} start_time = time.time() # Get the list of uids to query for this step. - uids = get_random_uids(self, k=k, exclude=exclude).to(self.device) + uids = get_random_uids(self, k=10, exclude=exclude).to(self.device) axons = [self.metagraph.axons[uid] for uid in uids] - synapse = prompting.protocol.Prompting( - roles = ['user'], - messages = [prompt] - ) + synapse = prompting.protocol.Prompting(roles=["user"], messages=[prompt]) # Make calls to the network with the prompt. responses: List[bt.Synapse] = await self.dendrite( @@ -89,13 +98,15 @@ async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude ) # Compute the rewards for the responses given the prompt. - rewards: torch.FloatTensor = torch.zeros(len(responses), dtype=torch.float32).to(self.device) + rewards: torch.FloatTensor = torch.zeros(len(responses), dtype=torch.float32).to( + self.device + ) for weight_i, reward_fn_i in zip(self.reward_weights, self.reward_functions): reward_i, reward_i_normalized = reward_fn_i.apply(prompt, responses, name) rewards += weight_i * reward_i_normalized.to(self.device) if not self.config.neuron.disable_log_rewards: event[reward_fn_i.name] = reward_i.tolist() - event[reward_fn_i.name + '_normalized'] = reward_i_normalized.tolist() + event[reward_fn_i.name + "_normalized"] = reward_i_normalized.tolist() bt.logging.trace(str(reward_fn_i.name), reward_i_normalized.tolist()) for masking_fn_i in self.masking_functions: @@ -103,30 +114,37 @@ async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude rewards *= mask_i_normalized.to(self.device) # includes diversity if not self.config.neuron.disable_log_rewards: event[masking_fn_i.name] = mask_i.tolist() - event[masking_fn_i.name + '_normalized'] = mask_i_normalized.tolist() + event[masking_fn_i.name + "_normalized"] = mask_i_normalized.tolist() bt.logging.trace(str(masking_fn_i.name), mask_i_normalized.tolist()) # Train the gating model based on the predicted scores and the actual rewards. gating_scores: torch.FloatTensor = self.gating_model(prompt).to(self.device) - gating_loss: torch.FloatTensor = self.gating_model.backward(scores=gating_scores[uids], rewards=rewards) + gating_loss: torch.FloatTensor = self.gating_model.backward( + scores=gating_scores[uids], rewards=rewards + ) # Find the best completion given the rewards vector. completions: List[str] = [comp.completion for comp in responses] best: str = completions[rewards.argmax(dim=0)].strip() # Get completion times - completion_times: List[float] = [comp.dendrite.process_time if comp.dendrite.process_time != None else 0 for comp in responses] + completion_times: List[float] = [ + comp.dendrite.process_time if comp.dendrite.process_time != None else 0 + for comp in responses + ] # Compute forward pass rewards, assumes followup_uids and answer_uids are mutually exclusive. # shape: [ metagraph.n ] - scattered_rewards: torch.FloatTensor = self.moving_averaged_scores.scatter(0, uids, rewards).to(self.device) + scattered_rewards: torch.FloatTensor = self.moving_averaged_scores.scatter( + 0, uids, rewards + ).to(self.device) # Update moving_averaged_scores with rewards produced by this step. # shape: [ metagraph.n ] alpha: float = self.config.neuron.moving_average_alpha - self.moving_averaged_scores: torch.FloatTensor = alpha * scattered_rewards + (1 - alpha) * self.moving_averaged_scores.to( - self.device - ) + self.moving_averaged_scores: torch.FloatTensor = alpha * scattered_rewards + ( + 1 - alpha + ) * self.moving_averaged_scores.to(self.device) # Log the step event. event.update( @@ -136,7 +154,7 @@ async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude "prompt": prompt, "uids": uids.tolist(), "completions": completions, - "completion_times":completion_times, + "completion_times": completion_times, "rewards": rewards.tolist(), "gating_loss": gating_loss.item(), "best": best, @@ -157,7 +175,6 @@ async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude async def forward(self): - # Obtain a unique context from the dataset. data = next(self.dataset)["text"] @@ -183,7 +200,6 @@ async def forward(self): exclude = augment_event["uids"] for k in range(self.config.neuron.num_followup_steps): - # Get a followup question, given the summarized context. prompt = followup_prompt(base_text, i=k) followup_event = await run_step( @@ -193,7 +209,7 @@ async def forward(self): k=self.config.neuron.followup_sample_size, timeout=self.config.neuron.followup_timeout, exclude=exclude, - base_prompt=base_prompt + base_prompt=base_prompt, ) exclude += followup_event["uids"] @@ -206,7 +222,7 @@ async def forward(self): k=self.config.neuron.answer_sample_size, timeout=self.config.neuron.answer_timeout, exclude=exclude, - base_prompt=followup_event["best"] + base_prompt=followup_event["best"], ) exclude += answer_event["uids"] @@ -216,8 +232,17 @@ async def forward(self): if k == 0: # Extend the base text with the best answer. base_text = ( - base_text + "\nPrevious Question \nQuestion:" + followup_event["best"] + "\nAnswer:" + answer_event["best"] + base_text + + "\nPrevious Question \nQuestion:" + + followup_event["best"] + + "\nAnswer:" + + answer_event["best"] ) else: - base_text = base_text + "\nQuestion:" + followup_event["best"] + "\nAnswer:" + answer_event["best"] - \ No newline at end of file + base_text = ( + base_text + + "\nQuestion:" + + followup_event["best"] + + "\nAnswer:" + + answer_event["best"] + ) diff --git a/prompting/validators/gating.py b/prompting/validators/gating.py index 7f7289e..749e965 100644 --- a/prompting/validators/gating.py +++ b/prompting/validators/gating.py @@ -136,14 +136,18 @@ def __init__( config = GatingModel.config() if model_name is not None: config.gating.model_name = model_name - config.gating.num_uids = num_uids if num_uids is not None else config.gating.num_uids + config.gating.num_uids = ( + num_uids if num_uids is not None else config.gating.num_uids + ) self.config = config self.num_uids = config.gating.num_uids self.device = torch.device(self.config.neuron.device) self.tokenizer = AutoTokenizer.from_pretrained(self.config.gating.model_name) self.tokenizer.pad_token = self.tokenizer.eos_token self.model = AutoModel.from_pretrained(self.config.gating.model_name) - self.linear = torch.nn.Linear(self.model.config.hidden_size, config.gating.num_uids) + self.linear = torch.nn.Linear( + self.model.config.hidden_size, config.gating.num_uids + ) self.optimizer = torch.optim.SGD( [{"params": self.linear.parameters()}], lr=self.config.gating.learning_rate, @@ -160,7 +164,9 @@ def backward(self, scores: torch.FloatTensor, rewards: torch.FloatTensor): """ normalized_scores = torch.nn.functional.softmax(scores, dim=0).to(self.device) normalized_rewards = torch.nn.functional.softmax(rewards, dim=0).to(self.device) - loss = torch.nn.functional.mse_loss(normalized_scores, normalized_rewards.detach()) + loss = torch.nn.functional.mse_loss( + normalized_scores, normalized_rewards.detach() + ) loss.backward() self.optimizer.step() return loss @@ -184,7 +190,7 @@ def forward(self, message: str) -> "torch.FloatTensor": # Pop the overflow mapping from the input to maintain the expected { input_ids, mask } format of the model _ = encoded_input.pop("overflow_to_sample_mapping") - + with torch.no_grad(): hidden_states = self.model(**encoded_input).last_hidden_state[0, -1, :] return self.linear(hidden_states) @@ -227,13 +233,17 @@ def __init__( config = SentenceEmbedGatingModel.config() if model_name is not None: config.gating.model_name = model_name - config.gating.num_uids = num_uids if num_uids is not None else config.gating.num_uids + config.gating.num_uids = ( + num_uids if num_uids is not None else config.gating.num_uids + ) self.config = config self.num_uids = config.gating.num_uids self.device = torch.device(self.config.neuron.device) self.tokenizer = AutoTokenizer.from_pretrained(self.config.gating.model_name) self.transformer = AutoModel.from_pretrained(self.config.gating.model_name) - self.linear = torch.nn.Linear(self.transformer.config.hidden_size, config.gating.num_uids) + self.linear = torch.nn.Linear( + self.transformer.config.hidden_size, config.gating.num_uids + ) self.optimizer = torch.optim.SGD( [{"params": self.linear.parameters()}], lr=self.config.gating.learning_rate, @@ -254,8 +264,12 @@ def mean_pooling(self, model_output, attention_mask): and dividing it by the sum of input_mask_expanded after clamping its values to a minimum of 1e-9. """ token_embeddings = model_output[0] - input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() - return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9) + input_mask_expanded = ( + attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() + ) + return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp( + input_mask_expanded.sum(1), min=1e-9 + ) def forward(self, message: str) -> "torch.FloatTensor": """Runs a forward pass through the model. @@ -280,8 +294,12 @@ def forward(self, message: str) -> "torch.FloatTensor": with torch.no_grad(): embeddings = self.transformer(**encoded_input) - sentence_embeddings = self.mean_pooling(embeddings, encoded_input["attention_mask"]) - sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1) + sentence_embeddings = self.mean_pooling( + embeddings, encoded_input["attention_mask"] + ) + sentence_embeddings = torch.nn.functional.normalize( + sentence_embeddings, p=2, dim=1 + ) batch_representation = torch.mean(sentence_embeddings, dim=0) scores = self.linear(batch_representation) @@ -298,7 +316,9 @@ def backward(self, scores: torch.FloatTensor, rewards: torch.FloatTensor): """ normalized_scores = torch.nn.functional.softmax(scores, dim=0).to(self.device) normalized_rewards = torch.nn.functional.softmax(rewards, dim=0).to(self.device) - loss = torch.nn.functional.mse_loss(normalized_scores, normalized_rewards.detach()) + loss = torch.nn.functional.mse_loss( + normalized_scores, normalized_rewards.detach() + ) loss.backward() self.optimizer.step() return loss diff --git a/prompting/validators/misc.py b/prompting/validators/misc.py index 886e8c5..34f113c 100644 --- a/prompting/validators/misc.py +++ b/prompting/validators/misc.py @@ -21,6 +21,8 @@ from math import floor from typing import Callable, Any from functools import lru_cache, update_wrapper + + # LRU Cache with TTL def ttl_cache(maxsize: int = 128, typed: bool = False, ttl: int = -1): if ttl <= 0: diff --git a/prompting/validators/mock.py b/prompting/validators/mock.py index ee6f8af..94bd6ee 100644 --- a/prompting/validators/mock.py +++ b/prompting/validators/mock.py @@ -56,7 +56,6 @@ def reward( class MockDendriteResponse: - class mock_status: status_code = 200 @@ -86,13 +85,7 @@ def __repr__(self): class MockDendrite(torch.nn.Module): - - async def query( - self, - synapse, - axons, - timeout - ): + async def query(self, synapse, axons, timeout): async def test(): await asyncio.sleep(0.01) return [MockDendriteResponse(synapse.messages[0]) for _ in axons] @@ -103,7 +96,12 @@ def resync(self, metagraph): pass async def async_backward( - self, uids: List[int], roles: List[str], messages: List[str], completions: List[str], rewards: List[float] + self, + uids: List[int], + roles: List[str], + messages: List[str], + completions: List[str], + rewards: List[float], ): async def query(): await asyncio.sleep(0.01) diff --git a/prompting/validators/prompts.py b/prompting/validators/prompts.py index d92fb08..171215c 100644 --- a/prompting/validators/prompts.py +++ b/prompting/validators/prompts.py @@ -21,15 +21,16 @@ class BasePrompt: r"""Base class for prompts expecting an extractable response.""" + def __init__(self): - self.template = '' - self.extract_pattern = '' + self.template = "" + self.extract_pattern = "" def text(self, *args) -> str: r"""Sanitize input strings and format prompt template.""" sanitized = args for tag in find_unique_tags(self.template): - sanitized = [arg.replace(tag, '') for arg in sanitized] + sanitized = [arg.replace(tag, "") for arg in sanitized] return self.template.format(*sanitized) @@ -47,14 +48,14 @@ def extract(self, response: str): def matches_template(self, input_text) -> bool: r"""Checks if the input_text matches the first unformatted part of the prompt template.""" - index = self.template.find('{') + index = self.template.find("{") return input_text[:index] == self.template[:index] class ScoringPrompt(BasePrompt): def __init__(self): super().__init__() - self.extract_pattern = r'\b([0-9]|10)\b' + self.extract_pattern = r"\b([0-9]|10)\b" def extract_score(self, response: str) -> float: r"""Extract numeric score (range 0-10) from prompt response.""" @@ -71,11 +72,14 @@ def extract_score(self, response: str) -> float: @staticmethod def mock_response(): r"""Mock responses to a followup prompt, for use in MockDendritePool.""" - return random.choices(["", f"{ random.randint(0, 10) }"], weights=[1, 9])[0] + return random.choices( + ["", f"{ random.randint(0, 10) }"], weights=[1, 9] + )[0] class AugmentPrompt(ScoringPrompt): r"""Scores a summary on a scale from 0 to 10, given a context.""" + def __init__(self): super().__init__() self.template = augment_scoring_template @@ -83,6 +87,7 @@ def __init__(self): class FollowupPrompt(ScoringPrompt): r"""Scores a question on a scale from 0 to 10, given a context.""" + def __init__(self): super().__init__() self.template = followup_scoring_template @@ -90,6 +95,7 @@ def __init__(self): class AnswerPrompt(ScoringPrompt): r"""Scores an answer on a scale from 0 to 10, given a question.""" + def __init__(self): super().__init__() self.template = answer_scoring_template @@ -97,6 +103,7 @@ def __init__(self): class FirewallPrompt(BasePrompt): r"""Detects jailbreaks or prompt injections that influence prompt-based scoring in answers.""" + def __init__(self): super().__init__() self.template = firewall_template @@ -106,25 +113,30 @@ def detected(self, response) -> bool: r"""Extract detection bool from prompt response.""" extraction = self.extract(response) if extraction is not None: - if extraction == 'True': + if extraction == "True": return True return False @staticmethod def mock_response(): r"""Mock responses to a firewall prompt, for use in MockDendritePool.""" - return random.choices(["", "False", "True"], weights=[1, 8, 1])[0] + return random.choices( + ["", "False", "True"], + weights=[1, 8, 1], + )[0] def find_unique_tags(input_text: str): r"""Find all substrings that match the pattern '<...>'.""" - matches = re.findall('<([^>]*)>', input_text) + matches = re.findall("<([^>]*)>", input_text) # Return a list of unique matches. return list(set(matches)) # Request a follow-up question given a preceding context. -followup_request_template = "Ask a single relevant and insightful question about the preceding context" +followup_request_template = ( + "Ask a single relevant and insightful question about the preceding context" +) # Scores a summary on a scale from 0 to 10, given a context. augment_scoring_template = """Score the relevance, succinctness, and quality of a summary given a context. The context is within tags, and the question is within tags. Give a score between 0 and 10 in the tags, where 0 means the summary is irrelevant, and 10 means it's perfectly relevant and a good summary. Include a brief explanation for your score based solely on the context-summary relationship. @@ -346,18 +358,21 @@ def find_unique_tags(input_text: str): """ -def followup_prompt( base_text:str, i:int = 0) -> str: + +def followup_prompt(base_text: str, i: int = 0) -> str: if i == 0: return f"{base_text}\n\n{followup_request_template}\n. Do not try to return an answer or a summary:" else: return f"{base_text}\n\n{followup_request_template} and previous questions. Do not try to return an answer or a summary:\n" -def answer_prompt( base_text:str, followup:str ) -> str: +def answer_prompt(base_text: str, followup: str) -> str: return f"{base_text}\n\nQuestion:{followup}\nAnswer the question step by step and explain your thoughts. Do not include questions or summaries in your answer." + augment_request_template = "Summarize the preceding context" -def augment_prompt( base_text:str ) -> str: + +def augment_prompt(base_text: str) -> str: random_level = random.randint(4, 8) return f"{base_text}\n\n{augment_request_template} in {random_level} sentences. Do not try to create questions or answers for your summarization.\n\n" diff --git a/prompting/validators/reward/__init__.py b/prompting/validators/reward/__init__.py index 51a20f9..d26773f 100644 --- a/prompting/validators/reward/__init__.py +++ b/prompting/validators/reward/__init__.py @@ -10,4 +10,4 @@ from .dahoas import DahoasRewardModel from .diversity import DiversityRewardModel from .prompt import PromptRewardModel -from .config import RewardModelType, DefaultRewardFrameworkConfig \ No newline at end of file +from .config import RewardModelType, DefaultRewardFrameworkConfig diff --git a/prompting/validators/reward/blacklist.py b/prompting/validators/reward/blacklist.py index b7027a4..48ba847 100644 --- a/prompting/validators/reward/blacklist.py +++ b/prompting/validators/reward/blacklist.py @@ -1,4 +1,3 @@ - # The MIT License (MIT) # Copyright © 2021 Yuma Rao @@ -23,34 +22,40 @@ blacklist = ["That is an excellent question."] -class Blacklist( BaseRewardModel ): +class Blacklist(BaseRewardModel): @property - def name(self) -> str: return RewardModelType.blacklist.value + def name(self) -> str: + return RewardModelType.blacklist.value def __init__(self): super().__init__() self.question_blacklist = [] self.answer_blacklist = [] - def reward( self, prompt: str, completion: str, name: str ) -> float: - if completion in blacklist: + def reward(self, prompt: str, completion: str, name: str) -> float: + if completion in blacklist: return 0.0 - + if completion == prompt: return 0.0 - + if completion in self.question_blacklist or completion in self.answer_blacklist: - return 0.0 - + return 0.0 + return 1 - def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: - return torch.tensor( [self.reward( prompt, completion, name ) for completion in completions], dtype=torch.float32) + def get_rewards( + self, prompt: str, completions: List[str], name: str + ) -> torch.FloatTensor: + return torch.tensor( + [self.reward(prompt, completion, name) for completion in completions], + dtype=torch.float32, + ) - def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: + def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: return rewards def reset(self): self.question_blacklist = [] - self.answer_blacklist = [] \ No newline at end of file + self.answer_blacklist = [] diff --git a/prompting/validators/reward/config.py b/prompting/validators/reward/config.py index f9bd5fc..9486bbc 100644 --- a/prompting/validators/reward/config.py +++ b/prompting/validators/reward/config.py @@ -18,16 +18,16 @@ class RewardModelType(Enum): - dpo = 'dpo_reward_model' - rlhf = 'rlhf_reward_model' - reciprocate = 'reciprocate_reward_model' - dahoas = 'dahoas_reward_model' - diversity = 'diversity_reward_model' - prompt = 'prompt_reward_model' - blacklist = 'blacklist_filter' - nsfw = 'nsfw_filter' - relevance = 'relevance_filter' - task_validator = 'task_validator_filter' + dpo = "dpo_reward_model" + rlhf = "rlhf_reward_model" + reciprocate = "reciprocate_reward_model" + dahoas = "dahoas_reward_model" + diversity = "diversity_reward_model" + prompt = "prompt_reward_model" + blacklist = "blacklist_filter" + nsfw = "nsfw_filter" + relevance = "relevance_filter" + task_validator = "task_validator_filter" @dataclass(frozen=True) @@ -35,6 +35,7 @@ class DefaultRewardFrameworkConfig: """Reward framework default configuration. Note: All the weights should add up to 1.0. """ + dpo_model_weight: float = 0 rlhf_model_weight: float = 1 reciprocate_model_weight: float = 0 diff --git a/prompting/validators/reward/dahoas.py b/prompting/validators/reward/dahoas.py index 71c8086..a163929 100644 --- a/prompting/validators/reward/dahoas.py +++ b/prompting/validators/reward/dahoas.py @@ -22,44 +22,47 @@ from .reward import BaseRewardModel from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig -class DahoasRewardModel( BaseRewardModel ): +class DahoasRewardModel(BaseRewardModel): model_name = "EleutherAI/gpt-j-6b" @property - def name(self) -> str: return RewardModelType.dahoas.value + def name(self) -> str: + return RewardModelType.dahoas.value @staticmethod - def load_weights( path: str ): - if not os.path.exists( path + "/hf_ckpt.pt"): - os.makedirs( path, exist_ok=True) + def load_weights(path: str): + if not os.path.exists(path + "/hf_ckpt.pt"): + os.makedirs(path, exist_ok=True) os.system( f"wget -O { path + '/hf_ckpt.pt'} \ https://huggingface.co/Dahoas/gptj-rm-static/resolve/main/hf_ckpt.pt" ) - def __init__(self, path: str, device: str ): + def __init__(self, path: str, device: str): super().__init__() - DahoasRewardModel.load_weights( path = path ) + DahoasRewardModel.load_weights(path=path) self.device = torch.device(device) - config = AutoConfig.from_pretrained( DahoasRewardModel.model_name ) - self.model = AutoModelForCausalLM.from_config( config ).to(self.device) + config = AutoConfig.from_pretrained(DahoasRewardModel.model_name) + self.model = AutoModelForCausalLM.from_config(config).to(self.device) self.config = self.model.config # `gpt-neo(x)` models use `hidden_size` attribute names instead of `n_embd`` if config is None: config = DahoasRewardModel.config() - self.config.n_embd = self.config.hidden_size if hasattr(self.config, "hidden_size") else self.config.n_embd + self.config.n_embd = ( + self.config.hidden_size + if hasattr(self.config, "hidden_size") + else self.config.n_embd + ) self.transformer = self.model.transformer self.v_head = torch.nn.Linear(self.config.n_embd, 1, bias=False).to(self.device) self.tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6b") self.tokenizer.pad_token = self.tokenizer.eos_token self.PAD_ID = self.tokenizer(self.tokenizer.pad_token)["input_ids"][0] - - def reward( self, prompt: str, completion: str, name: str ) -> float: - + def reward(self, prompt: str, completion: str, name: str) -> float: def reward_fn(samples): if samples is None: return 0 @@ -67,7 +70,10 @@ def reward_fn(samples): batch_size = 1 for i in range(0, len(samples), batch_size): sub_samples = samples[i : i + batch_size] - sub_samples = ["<|startoftext|>" + chosen + "<|endoftext|>" for chosen in sub_samples] + sub_samples = [ + "<|startoftext|>" + chosen + "<|endoftext|>" + for chosen in sub_samples + ] encodings_dict = self.tokenizer( sub_samples, truncation=False, @@ -81,20 +87,25 @@ def reward_fn(samples): attn_masks = attn_masks.repeat(2, 1) with torch.no_grad(): sub_scores = self.forward( - input_ids = input_ids.to(self.device), - attention_mask = attn_masks.to(self.device), + input_ids=input_ids.to(self.device), + attention_mask=attn_masks.to(self.device), ) scores_list.append(sub_scores["chosen_end_scores"]) scores = torch.cat(scores_list, dim=0).mean().item() return scores with torch.no_grad(): - combined_reward = reward_fn( prompt + completion ) - independent_reward = reward_fn( completion ) - return float( (combined_reward - independent_reward).item() ) - - def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: - return torch.tensor( [self.reward( prompt, completion, name ) for completion in completions], dtype=torch.float32).to(self.device) + combined_reward = reward_fn(prompt + completion) + independent_reward = reward_fn(completion) + return float((combined_reward - independent_reward).item()) + + def get_rewards( + self, prompt: str, completions: List[str], name: str + ) -> torch.FloatTensor: + return torch.tensor( + [self.reward(prompt, completion, name) for completion in completions], + dtype=torch.float32, + ).to(self.device) def forward( self, @@ -114,7 +125,7 @@ def forward( loss = None transformer_outputs = self.transformer( input_ids.to(self.device), - attention_mask = attention_mask.to(self.device), + attention_mask=attention_mask.to(self.device), ) hidden_states = transformer_outputs[0] @@ -161,7 +172,9 @@ def forward( rejected_end_scores.append(r_truncated_reward[-1]) # Compute loss based on truncated rewards (ignore padding) - loss += -torch.log(torch.sigmoid(c_truncated_reward - r_truncated_reward)).mean() + loss += -torch.log( + torch.sigmoid(c_truncated_reward - r_truncated_reward) + ).mean() loss = loss / bs if not inference: @@ -176,4 +189,4 @@ def forward( "loss": loss, "chosen_end_scores": chosen_end_scores, "rejected_end_scores": rejected_end_scores, - } \ No newline at end of file + } diff --git a/prompting/validators/reward/diversity.py b/prompting/validators/reward/diversity.py index 242f302..d4c8799 100644 --- a/prompting/validators/reward/diversity.py +++ b/prompting/validators/reward/diversity.py @@ -20,11 +20,12 @@ from typing import List from .config import RewardModelType from .reward import BaseRewardModel -from transformers import AutoTokenizer, AutoModel +from transformers import AutoTokenizer, AutoModel from torchmetrics.functional import pairwise_cosine_similarity -def mean_pooling( model_output, attention_mask ): + +def mean_pooling(model_output, attention_mask): """Applies mean pooling to the token embeddings generated by the model. Args: model_output (torch.Tensor): Embedding model output, where the first element contains token embeddings. @@ -38,29 +39,36 @@ def mean_pooling( model_output, attention_mask ): and dividing it by the sum of input_mask_expanded after clamping its values to a minimum of 1e-9. """ token_embeddings = model_output[0] - input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() + input_mask_expanded = ( + attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() + ) return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp( input_mask_expanded.sum(1), min=1e-9 ) - -class DiversityRewardModel( BaseRewardModel ): - + + +class DiversityRewardModel(BaseRewardModel): diversity_model_path = "sentence-transformers/all-mpnet-base-v2" - + @property - def name(self) -> str: return RewardModelType.diversity.value + def name(self) -> str: + return RewardModelType.diversity.value - def __init__( self, device: str ): + def __init__(self, device: str): super().__init__() self.device = device - self.tokenizer = AutoTokenizer.from_pretrained( DiversityRewardModel.diversity_model_path ) - self.model = AutoModel.from_pretrained( DiversityRewardModel.diversity_model_path ).to(self.device) + self.tokenizer = AutoTokenizer.from_pretrained( + DiversityRewardModel.diversity_model_path + ) + self.model = AutoModel.from_pretrained( + DiversityRewardModel.diversity_model_path + ).to(self.device) self.reward_bottom_k = 2 self.history_reward_bottom_k = 2 self.historic_embeddings = torch.tensor([]).to(self.device) self.history_range = (500, 15500) - - def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor": + + def get_embeddings(self, sentences: List[str]) -> "torch.FloatTensor": """Runs a forward pass through the model. Args: sentences (:obj:`List[str]`): @@ -84,12 +92,12 @@ def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor": # Pooling sentence_embeddings = mean_pooling(embeddings, encoded_input["attention_mask"]) - + # Normalizing sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1) return sentence_embeddings - def update_historic_embeddings( self, embeddings: torch.FloatTensor ): + def update_historic_embeddings(self, embeddings: torch.FloatTensor): def unique(embeddings): unique_embeddings = [embeddings[0]] last_emb = embeddings[0] @@ -98,48 +106,58 @@ def unique(embeddings): unique_embeddings.append(emb) last_emb = emb return torch.stack(unique_embeddings) - + embeddings_unique = unique(embeddings) historic_embeddings = torch.cat([self.historic_embeddings, embeddings_unique]) - self.historic_embeddings = historic_embeddings[-self.history_range[1]:, :] - - def get_historic_rewards( self, embeddings: torch.FloatTensor ) -> torch.FloatTensor: - def regularise( rewards ): + self.historic_embeddings = historic_embeddings[-self.history_range[1] :, :] + + def get_historic_rewards(self, embeddings: torch.FloatTensor) -> torch.FloatTensor: + def regularise(rewards): # sigmoid function that cutoff at 0.05 approximately - return 1/(1 + torch.exp(-1000 * rewards + 50)) + return 1 / (1 + torch.exp(-1000 * rewards + 50)) - # Return None if history size is too small - if self.historic_embeddings.shape[0] < (self.history_range[0] + self.history_reward_bottom_k): + # Return None if history size is too small + if self.historic_embeddings.shape[0] < ( + self.history_range[0] + self.history_reward_bottom_k + ): return None - + # Calculate the pairwise cosine similarity. - similarity = pairwise_cosine_similarity( embeddings, self.historic_embeddings[self.history_range[0]:] ) + similarity = pairwise_cosine_similarity( + embeddings, self.historic_embeddings[self.history_range[0] :] + ) # Reward to be at the bottom_k smallest of the 1 - similarity score. - rewards = torch.topk((1 - torch.abs(similarity)), self.history_reward_bottom_k, largest = False)[0][:, -1] + rewards = torch.topk( + (1 - torch.abs(similarity)), self.history_reward_bottom_k, largest=False + )[0][:, -1] - return regularise(rewards) + return regularise(rewards) - def get_batch_rewards( self, embeddings: torch.FloatTensor ) -> torch.FloatTensor: - def regularise( rewards ): + def get_batch_rewards(self, embeddings: torch.FloatTensor) -> torch.FloatTensor: + def regularise(rewards): # sigmoid function that maps 0.07 -> 0.23; 0.1 -> 0.5; 0.2 -> 0.98 - return 1/(1 + torch.exp(-40 * rewards + 4)) + return 1 / (1 + torch.exp(-40 * rewards + 4)) # Calculate the pairwise cosine similarity. - similarity = pairwise_cosine_similarity( embeddings, embeddings ) + similarity = pairwise_cosine_similarity(embeddings, embeddings) # Reward to be at the 10% quantile of the 1 - similarity score. - rewards = torch.topk((1 - torch.abs(similarity)), self.reward_bottom_k, largest = False)[0][:, -1] + rewards = torch.topk( + (1 - torch.abs(similarity)), self.reward_bottom_k, largest=False + )[0][:, -1] + + return regularise(rewards) - return regularise(rewards) - - def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: + def get_rewards( + self, prompt: str, completions: List[str], name: str + ) -> torch.FloatTensor: # Check if completions are empty, return 0 if so if len(completions) == 0: return torch.tensor([]).to(self.device) - + # Get embeddings for all completions. - embeddings = self.get_embeddings( completions ) + embeddings = self.get_embeddings(completions) # Get batch rewards. batch_rewards = self.get_batch_rewards(embeddings) @@ -148,12 +166,12 @@ def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch historic_rewards = self.get_historic_rewards(embeddings) self.update_historic_embeddings(embeddings) - + # Return all if historic_rewards != None: return batch_rewards * historic_rewards else: return batch_rewards - - def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: - return rewards \ No newline at end of file + + def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: + return rewards diff --git a/prompting/validators/reward/dpo.py b/prompting/validators/reward/dpo.py index 6cc7d59..cfa8907 100644 --- a/prompting/validators/reward/dpo.py +++ b/prompting/validators/reward/dpo.py @@ -24,94 +24,124 @@ class DirectPreferenceRewardModel(BaseRewardModel): - reward_model_name: str = "cerebras/btlm-3b-8k-base" @property - def name(self) -> str: return RewardModelType.dpo.value + def name(self) -> str: + return RewardModelType.dpo.value def __init__(self, device: str): super().__init__() self.device = device - self.penalty = 1.2 # Same penalty as the original [paper](https://arxiv.org/pdf/1909.05858.pdf). - self.tokenizer = AutoTokenizer.from_pretrained(DirectPreferenceRewardModel.reward_model_name) - self.model = AutoModelForCausalLM.from_pretrained(DirectPreferenceRewardModel.reward_model_name, - trust_remote_code=True, - torch_dtype=torch.float16).to(self.device) - - def reward_single(self, prompt: str, completion: str, name: str ,with_penalty=True) -> float: - r""" Calculates a direct preference optimization (DPO) style reward for a completion, + self.penalty = 1.2 # Same penalty as the original [paper](https://arxiv.org/pdf/1909.05858.pdf). + self.tokenizer = AutoTokenizer.from_pretrained( + DirectPreferenceRewardModel.reward_model_name + ) + self.model = AutoModelForCausalLM.from_pretrained( + DirectPreferenceRewardModel.reward_model_name, + trust_remote_code=True, + torch_dtype=torch.float16, + ).to(self.device) + + def reward_single( + self, prompt: str, completion: str, name: str, with_penalty=True + ) -> float: + r"""Calculates a direct preference optimization (DPO) style reward for a completion, which is a reference model's average log-probability for completion tokens given a prompt. Uses guidance from https://github.com/eric-mitchell/direct-preference-optimization/blob/main/trainers.py. """ with torch.no_grad(): + # Check if completion is + if completion.strip() == "" or len(completion) <= 5: + return -11 # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) - # Check if completion is - if completion.strip() == '' or len(completion) <= 5: - return -11 # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) - # Tokenize the combined prompt + completion. - combined = self.tokenizer(prompt + completion, return_tensors="pt").input_ids[0].to(self.device) # [seq_len] + combined = ( + self.tokenizer(prompt + completion, return_tensors="pt") + .input_ids[0] + .to(self.device) + ) # [seq_len] # Tokenize only the prompt, to help determine prompt token length. - prompt_part = self.tokenizer(prompt, return_tensors="pt").input_ids[0].to(self.device) # [prompt_len] + prompt_part = ( + self.tokenizer(prompt, return_tensors="pt").input_ids[0].to(self.device) + ) # [prompt_len] # Completion doesn't fit into model sequence, so return lowest reward. if self.tokenizer.model_max_length <= len(prompt_part): - return -11. # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) + return -11.0 # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) # Truncate combined to fit into model max sequence length. if self.tokenizer.model_max_length < len(combined): - combined = combined[:self.tokenizer.model_max_length] + combined = combined[: self.tokenizer.model_max_length] labels = combined.clone() # [seq_len] # Ignore prompt part for calculating reward. - labels[:len(prompt_part)] = -100 + labels[: len(prompt_part)] = -100 # Label only each next token prediction ground-truth. labels = labels[1:] # [seq_len-1] - loss_mask = (labels != -100) # [seq_len-1] + loss_mask = labels != -100 # [seq_len-1] # Dummy token to allow for indexing, but loss will be ignored. labels[labels == -100] = 0 # Reshape for gather operation. labels = labels.unsqueeze(0).unsqueeze(2) # [batch_size=1, seq_len-1, :] # Forward pass to calculate logit predictions for each sequence position. - logits = self.model(combined.unsqueeze(0)).logits # [batch_size=1, seq_len, vocab_len] + logits = self.model( + combined.unsqueeze(0) + ).logits # [batch_size=1, seq_len, vocab_len] # Predict only where labels are available. logits = logits[:, :-1, :] # [batch_size=1, seq_len-1, vocab_len] if with_penalty: # Apply penalty for repeated generation - for i in range(len(prompt_part)+1, len(combined)-1): - logit = logits[:,i,:].clone() - inputs = combined[len(prompt_part):i].clone() - logits[:,i,:] = self.logit_penalty(input_ids=inputs, logit=logit) + for i in range(len(prompt_part) + 1, len(combined) - 1): + logit = logits[:, i, :].clone() + inputs = combined[len(prompt_part) : i].clone() + logits[:, i, :] = self.logit_penalty(input_ids=inputs, logit=logit) # Rescale via log(softmax(logits)). logits = logits.log_softmax(-1) # Calculate the model's log-probability for each actual completion token. - per_token_logps = torch.gather(logits, dim=2, index=labels).squeeze(2) # [batch_size=1, seq_len-1] + per_token_logps = torch.gather(logits, dim=2, index=labels).squeeze( + 2 + ) # [batch_size=1, seq_len-1] # Average log-probability over completion sequence. - reward = (per_token_logps * loss_mask).sum(-1) / loss_mask.sum(-1) # [batch_size=1] + reward = (per_token_logps * loss_mask).sum(-1) / loss_mask.sum( + -1 + ) # [batch_size=1] reward = reward[0].cpu().detach() # NaNs can possibly arise through log(0)=-inf, replace with suitably small logits. if torch.isnan(reward) or torch.isinf(reward): - return -11. # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) + return -11.0 # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) return reward.item() - - def get_rewards(self, prompt: str, completions: List[str], name: str) -> torch.FloatTensor: - rewards = torch.tensor([self.reward_single(prompt, completion, name) for completion in completions], - dtype=torch.float32).to(self.device) + + def get_rewards( + self, prompt: str, completions: List[str], name: str + ) -> torch.FloatTensor: + rewards = torch.tensor( + [ + self.reward_single(prompt, completion, name) + for completion in completions + ], + dtype=torch.float32, + ).to(self.device) bt.logging.trace(f"DirectPreferenceRewardModel | rewards: {rewards.tolist()}") return rewards - def logit_penalty(self, input_ids: torch.LongTensor, logit: torch.FloatTensor) -> torch.FloatTensor: + def logit_penalty( + self, input_ids: torch.LongTensor, logit: torch.FloatTensor + ) -> torch.FloatTensor: # Counts the unique tokens within each generation uniques, counts = input_ids.unique(return_counts=True) score = torch.gather(logit, 1, uniques.unsqueeze(0)) # if score < 0 then repetition penalty has to be multiplied to reduce the previous token probability - score = torch.where(score < 0, score * (self.penalty**counts), score / (self.penalty**counts)) + score = torch.where( + score < 0, + score * (self.penalty**counts), + score / (self.penalty**counts), + ) logit.scatter_(1, uniques.unsqueeze(0), score.to(logit.dtype)) - return logit \ No newline at end of file + return logit diff --git a/prompting/validators/reward/nsfw.py b/prompting/validators/reward/nsfw.py index dfe271d..b608ed7 100644 --- a/prompting/validators/reward/nsfw.py +++ b/prompting/validators/reward/nsfw.py @@ -1,5 +1,3 @@ - - # The MIT License (MIT) # Copyright © 2021 Yuma Rao @@ -23,45 +21,56 @@ from .reward import BaseRewardModel from transformers import AutoModelForSequenceClassification, AutoTokenizer -class NSFWRewardModel( BaseRewardModel ): +class NSFWRewardModel(BaseRewardModel): nsfw_filter_model_path = "facebook/roberta-hate-speech-dynabench-r4-target" @property - def name(self) -> str: return RewardModelType.nsfw.value - - def __init__( self, device: str ): + def name(self) -> str: + return RewardModelType.nsfw.value + + def __init__(self, device: str): super().__init__() self.device = device - self.tokenizer = AutoTokenizer.from_pretrained( NSFWRewardModel.nsfw_filter_model_path ) - self.model = AutoModelForSequenceClassification.from_pretrained( NSFWRewardModel.nsfw_filter_model_path ).to(self.device) - - def reward( self, prompt: str, completion: str, name: str ) -> float: + self.tokenizer = AutoTokenizer.from_pretrained( + NSFWRewardModel.nsfw_filter_model_path + ) + self.model = AutoModelForSequenceClassification.from_pretrained( + NSFWRewardModel.nsfw_filter_model_path + ).to(self.device) + def reward(self, prompt: str, completion: str, name: str) -> float: boundary = -0.5 with torch.no_grad(): message = completion input_ids = self.tokenizer(message)["input_ids"] # Returns the nsfw hate score for the chunk. - def hate_score( chunk ) -> float: - nothate, hate = self.model(torch.tensor( [chunk] ).to(self.device)).logits[0].tolist() + def hate_score(chunk) -> float: + nothate, hate = ( + self.model(torch.tensor([chunk]).to(self.device)).logits[0].tolist() + ) return nothate, hate # Returns the max hate score from each chunk of size chunk_size - def sum_nsfw_scores(input_ids, chunk_size ): + def sum_nsfw_scores(input_ids, chunk_size): max_score = -1000 for i in range(0, len(input_ids), chunk_size): - chunk_i = input_ids[i:i + chunk_size] - not_hate_score_i, hate_score_i = hate_score( chunk_i ) - max_score = max( max( -not_hate_score_i, hate_score_i), max_score) + chunk_i = input_ids[i : i + chunk_size] + not_hate_score_i, hate_score_i = hate_score(chunk_i) + max_score = max(max(-not_hate_score_i, hate_score_i), max_score) return max_score - + # 0 when needs to be filtered out, 1 when it is safe - return 0.0 if sum_nsfw_scores( input_ids, chunk_size = 512 ) > boundary else 1.0 - - def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: - return torch.tensor( [self.reward( prompt, completion, name ) for completion in completions], dtype=torch.float32).to(self.device) + return 0.0 if sum_nsfw_scores(input_ids, chunk_size=512) > boundary else 1.0 + + def get_rewards( + self, prompt: str, completions: List[str], name: str + ) -> torch.FloatTensor: + return torch.tensor( + [self.reward(prompt, completion, name) for completion in completions], + dtype=torch.float32, + ).to(self.device) - def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: - return rewards \ No newline at end of file + def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: + return rewards diff --git a/prompting/validators/reward/open_assistant.py b/prompting/validators/reward/open_assistant.py index a6ff19d..8eaed9f 100644 --- a/prompting/validators/reward/open_assistant.py +++ b/prompting/validators/reward/open_assistant.py @@ -21,23 +21,38 @@ from .reward import BaseRewardModel from transformers import AutoTokenizer, AutoModelForSequenceClassification -class OpenAssistantRewardModel( BaseRewardModel ): +class OpenAssistantRewardModel(BaseRewardModel): reward_model_name: str = "OpenAssistant/reward-model-deberta-v3-large-v2" @property - def name(self) -> str: return RewardModelType.rlhf.value + def name(self) -> str: + return RewardModelType.rlhf.value - def __init__( self , device: str ): + def __init__(self, device: str): super().__init__() self.device = device - self.tokenizer = AutoTokenizer.from_pretrained( OpenAssistantRewardModel.reward_model_name ) - self.model = AutoModelForSequenceClassification.from_pretrained( OpenAssistantRewardModel.reward_model_name ) .to(self.device) - - def reward_single( self, prompt: str, completion: str, name: str ) -> float: + self.tokenizer = AutoTokenizer.from_pretrained( + OpenAssistantRewardModel.reward_model_name + ) + self.model = AutoModelForSequenceClassification.from_pretrained( + OpenAssistantRewardModel.reward_model_name + ).to(self.device) + + def reward_single(self, prompt: str, completion: str, name: str) -> float: with torch.no_grad(): - inputs = self.tokenizer(prompt, completion, return_tensors='pt').to(self.device) - return float( self.model( **inputs ).logits[0].cpu().detach() ) - - def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: - return torch.tensor( [self.reward_single( prompt, completion, name ) for completion in completions], dtype=torch.float32).to(self.device) + inputs = self.tokenizer(prompt, completion, return_tensors="pt").to( + self.device + ) + return float(self.model(**inputs).logits[0].cpu().detach()) + + def get_rewards( + self, prompt: str, completions: List[str], name: str + ) -> torch.FloatTensor: + return torch.tensor( + [ + self.reward_single(prompt, completion, name) + for completion in completions + ], + dtype=torch.float32, + ).to(self.device) diff --git a/prompting/validators/reward/prompt.py b/prompting/validators/reward/prompt.py index 7356681..3e64cc2 100644 --- a/prompting/validators/reward/prompt.py +++ b/prompting/validators/reward/prompt.py @@ -29,30 +29,34 @@ class PromptRewardModel(BaseRewardModel): reward_model_name: str = "VMware/open-llama-7b-open-instruct" @property - def name(self) -> str: return RewardModelType.prompt.value + def name(self) -> str: + return RewardModelType.prompt.value - def __init__(self, device: str ): + def __init__(self, device: str): super().__init__() self.device = device # https://huggingface.co/VMware/open-llama-7b-open-instruct # Fast tokenizer results in incorrect encoding, set the use_fast = False parameter. - self.tokenizer = AutoTokenizer.from_pretrained(PromptRewardModel.reward_model_name, use_fast=False) + self.tokenizer = AutoTokenizer.from_pretrained( + PromptRewardModel.reward_model_name, use_fast=False + ) # Generative default expects most recent token on right-hand side with padding on left. # https://github.com/huggingface/transformers/pull/10552 self.tokenizer.padding_side = "left" - self.model = AutoModelForCausalLM.from_pretrained(PromptRewardModel.reward_model_name, - torch_dtype=torch.float16).to(self.device) + self.model = AutoModelForCausalLM.from_pretrained( + PromptRewardModel.reward_model_name, torch_dtype=torch.float16 + ).to(self.device) def reward(self, prompt: str, completion: str, name: str) -> float: with torch.no_grad(): # Choose correct scoring prompt for request type. - if name == 'augment': + if name == "augment": scoring_prompt = AugmentPrompt() - elif name == 'followup': + elif name == "followup": scoring_prompt = FollowupPrompt() - elif name == 'answer': + elif name == "answer": scoring_prompt = AnswerPrompt() else: return 0 @@ -72,23 +76,37 @@ def reward(self, prompt: str, completion: str, name: str) -> float: # Prompt local reward model. start_time = time.time() - generated_tokens = self.model.generate(input_ids, max_new_tokens=2, max_time=1) + generated_tokens = self.model.generate( + input_ids, max_new_tokens=2, max_time=1 + ) duration = time.time() - start_time - generated_text = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) + generated_text = self.tokenizer.batch_decode( + generated_tokens, skip_special_tokens=True + ) # Extract score from generated text. - score_text = generated_text[0][len(scoring_prompt_text):] + score_text = generated_text[0][len(scoring_prompt_text) :] score = scoring_prompt.extract_score(score_text) - bt.logging.trace(f"PromptRewardModel | {name} score: {score} | {repr(score_text)} | " - f"{duration:.2f}s | {repr(completion[:70])}") + bt.logging.trace( + f"PromptRewardModel | {name} score: {score} | {repr(score_text)} | " + f"{duration:.2f}s | {repr(completion[:70])}" + ) # Scale 0-10 score to 0-1 range. - score /= 10. + score /= 10.0 return score - - def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: - bt.logging.debug(f"PromptRewardModel | Calculating {len(completions)} rewards (typically < 1 sec/reward).") - bt.logging.trace(f"PromptRewardModel | prompt: {repr(prompt[:50])} ... {repr(prompt[-50:])}") - return torch.tensor( [self.reward( prompt, completion, name ) for completion in completions], dtype=torch.float32).to(self.device) - + + def get_rewards( + self, prompt: str, completions: List[str], name: str + ) -> torch.FloatTensor: + bt.logging.debug( + f"PromptRewardModel | Calculating {len(completions)} rewards (typically < 1 sec/reward)." + ) + bt.logging.trace( + f"PromptRewardModel | prompt: {repr(prompt[:50])} ... {repr(prompt[-50:])}" + ) + return torch.tensor( + [self.reward(prompt, completion, name) for completion in completions], + dtype=torch.float32, + ).to(self.device) diff --git a/prompting/validators/reward/reciprocate.py b/prompting/validators/reward/reciprocate.py index 2dc5187..ba1975c 100644 --- a/prompting/validators/reward/reciprocate.py +++ b/prompting/validators/reward/reciprocate.py @@ -1,4 +1,3 @@ - # The MIT License (MIT) # Copyright © 2021 Yuma Rao @@ -22,31 +21,44 @@ from .reward import BaseRewardModel from transformers import AutoTokenizer, AutoModelForSequenceClassification -class ReciprocateRewardModel( BaseRewardModel ): +class ReciprocateRewardModel(BaseRewardModel): reward_model_path: str = "reciprocate/gpt-j_rm_format-oa" revision: str = "501f895" @property - def name(self) -> str: return RewardModelType.reciprocate.value + def name(self) -> str: + return RewardModelType.reciprocate.value - def __init__( self, device: str ): + def __init__(self, device: str): super().__init__() self.device = device - self.tokenizer = AutoTokenizer.from_pretrained( ReciprocateRewardModel.reward_model_path, revision = ReciprocateRewardModel.revision ) - self.model = AutoModelForSequenceClassification.from_pretrained( ReciprocateRewardModel.reward_model_path, - revision = ReciprocateRewardModel.revision, - torch_dtype=torch.float16).to(self.device) + self.tokenizer = AutoTokenizer.from_pretrained( + ReciprocateRewardModel.reward_model_path, + revision=ReciprocateRewardModel.revision, + ) + self.model = AutoModelForSequenceClassification.from_pretrained( + ReciprocateRewardModel.reward_model_path, + revision=ReciprocateRewardModel.revision, + torch_dtype=torch.float16, + ).to(self.device) - def reward( self, prompt: str, completion: str, name: str ) -> float: + def reward(self, prompt: str, completion: str, name: str) -> float: with torch.no_grad(): - message = f"<|prompter|>{prompt}<|assistant|>{completion}<|endoftext|>" - inputs = self.tokenizer( message, - return_tensors="pt" , - truncation=True, - ).to(self.device) - return float( self.model( **inputs )[0].item() ) - - def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: - return torch.tensor( [self.reward( prompt, completion, name ) for completion in completions], dtype=torch.float32).to(self.device) - + message = ( + f"<|prompter|>{prompt}<|assistant|>{completion}<|endoftext|>" + ) + inputs = self.tokenizer( + message, + return_tensors="pt", + truncation=True, + ).to(self.device) + return float(self.model(**inputs)[0].item()) + + def get_rewards( + self, prompt: str, completions: List[str], name: str + ) -> torch.FloatTensor: + return torch.tensor( + [self.reward(prompt, completion, name) for completion in completions], + dtype=torch.float32, + ).to(self.device) diff --git a/prompting/validators/reward/relevance.py b/prompting/validators/reward/relevance.py index c8789e1..86b8dcc 100644 --- a/prompting/validators/reward/relevance.py +++ b/prompting/validators/reward/relevance.py @@ -1,4 +1,3 @@ - # The MIT License (MIT) # Copyright © 2021 Yuma Rao @@ -20,7 +19,7 @@ from typing import List from .config import RewardModelType from .reward import BaseRewardModel -from transformers import AutoTokenizer, AutoModel +from transformers import AutoTokenizer, AutoModel from torchmetrics.functional import pairwise_cosine_similarity import torch.nn.functional as F @@ -46,31 +45,36 @@ def mean_pooling(model_output, attention_mask): input_mask_expanded.sum(1), min=1e-9 ) -class RelevanceRewardModel( BaseRewardModel ): +class RelevanceRewardModel(BaseRewardModel): @property - def name(self) -> str: return RewardModelType.relevance.value - - def __init__( self, device: str ): + def name(self) -> str: + return RewardModelType.relevance.value + + def __init__(self, device: str): super().__init__() self.device = device self.models = [ BertRelevanceRewardModel(self.device), - MpnetRelevenceModel(self.device) + MpnetRelevenceModel(self.device), ] self.bounds = [-0.0246, 0.3] - def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: - return torch.tensor( [self.reward( prompt, completion, name ) for completion in completions], dtype=torch.float32).to(self.device) - - def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: + def get_rewards( + self, prompt: str, completions: List[str], name: str + ) -> torch.FloatTensor: + return torch.tensor( + [self.reward(prompt, completion, name) for completion in completions], + dtype=torch.float32, + ).to(self.device) + + def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: return rewards - + def reward(self, prompt: str, completion: str, name: str) -> float: for i, model in enumerate(self.models): - # rewards - diff = model.reward(prompt,completion) + diff = model.reward(prompt, completion) # If a model returns 0, stop iterating and return 0 if diff < self.bounds[i]: @@ -78,15 +82,19 @@ def reward(self, prompt: str, completion: str, name: str) -> float: # If none of the models returned 0, return 1 return 1.0 -class BertRelevanceRewardModel( BaseRewardModel ): +class BertRelevanceRewardModel(BaseRewardModel): relevance_model_path = "bert-base-uncased" - - def __init__( self, device: str ): + + def __init__(self, device: str): super().__init__() self.device = device - self.tokenizer = AutoTokenizer.from_pretrained(BertRelevanceRewardModel.relevance_model_path) - self.model = AutoModel.from_pretrained(BertRelevanceRewardModel.relevance_model_path).to(self.device) + self.tokenizer = AutoTokenizer.from_pretrained( + BertRelevanceRewardModel.relevance_model_path + ) + self.model = AutoModel.from_pretrained( + BertRelevanceRewardModel.relevance_model_path + ).to(self.device) def get_embedding(self, message: str) -> "torch.FloatTensor": """Runs a forward pass through the model. @@ -112,33 +120,39 @@ def get_embedding(self, message: str) -> "torch.FloatTensor": embeddings = self.model(**encoded_input) sentence_embeddings = mean_pooling(embeddings, encoded_input["attention_mask"]) - sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1) + sentence_embeddings = torch.nn.functional.normalize( + sentence_embeddings, p=2, dim=1 + ) batch_representation = torch.mean(sentence_embeddings, dim=0) return batch_representation - - def reward( self, prompt: str, completion:str ) -> float: + + def reward(self, prompt: str, completion: str) -> float: # Get the two bert embeddings. - completion_embedding = self.get_embedding( completion) - prompt_embedding = self.get_embedding( prompt) + completion_embedding = self.get_embedding(completion) + prompt_embedding = self.get_embedding(prompt) # Calculate the RMSE distance for the 2 embeddings. - diff = (( completion_embedding - prompt_embedding )**2).mean()**0.5 + diff = ((completion_embedding - prompt_embedding) ** 2).mean() ** 0.5 # Return relevance scoring. return float(-diff) -class MpnetRelevenceModel( BaseRewardModel ): - + +class MpnetRelevenceModel(BaseRewardModel): diversity_model_path = "sentence-transformers/all-mpnet-base-v2" - def __init__( self, device: str ): + def __init__(self, device: str): super().__init__() self.device = device - self.tokenizer = AutoTokenizer.from_pretrained( MpnetRelevenceModel.diversity_model_path ) - self.model = AutoModel.from_pretrained( MpnetRelevenceModel.diversity_model_path ).to(self.device) + self.tokenizer = AutoTokenizer.from_pretrained( + MpnetRelevenceModel.diversity_model_path + ) + self.model = AutoModel.from_pretrained( + MpnetRelevenceModel.diversity_model_path + ).to(self.device) self.reward_quantile = torch.tensor(0.1).to(self.device) - - def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor": + + def get_embeddings(self, sentences: List[str]) -> "torch.FloatTensor": """Runs a forward pass through the model. Args: sentences (:obj:`List[str]`): @@ -162,18 +176,17 @@ def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor": # Pooling sentence_embeddings = mean_pooling(embeddings, encoded_input["attention_mask"]) - + # Normalizing sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1) return sentence_embeddings - def reward( self, prompt: str, completion: str ) -> torch.FloatTensor: - + def reward(self, prompt: str, completion: str) -> torch.FloatTensor: # Get embeddings for all completions. - embeddings = self.get_embeddings( completion ) - prompt_embed = self.get_embeddings( prompt ) + embeddings = self.get_embeddings(completion) + prompt_embed = self.get_embeddings(prompt) # Calculate the pairwise cosine similarity. - similarity = pairwise_cosine_similarity( prompt_embed, embeddings ) + similarity = pairwise_cosine_similarity(prompt_embed, embeddings) - return torch.abs(similarity) \ No newline at end of file + return torch.abs(similarity) diff --git a/prompting/validators/reward/reward.py b/prompting/validators/reward/reward.py index fb2d7e3..edcb667 100644 --- a/prompting/validators/reward/reward.py +++ b/prompting/validators/reward/reward.py @@ -20,16 +20,24 @@ from typing import List from abc import abstractmethod -class BaseRewardModel: +class BaseRewardModel: @property @abstractmethod - def name(self) -> str: ... - def __str__(self) -> str: return str(self.name) - def __repr__(self) -> str: return str(self.name) + def name(self) -> str: + ... + + def __str__(self) -> str: + return str(self.name) + + def __repr__(self) -> str: + return str(self.name) @abstractmethod - def get_rewards( self, prompt: str, completion: List[str], name: str ) -> torch.FloatTensor: ... + def get_rewards( + self, prompt: str, completion: List[str], name: str + ) -> torch.FloatTensor: + ... def __init__(self) -> None: self.count = 0 @@ -37,21 +45,21 @@ def __init__(self) -> None: self.var = 0.0 self.count_limit = 3000 - def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: + def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: """ - This method normalizes the given rewards by updating the moving mean and variance statistics. The rewards are first standardized, and then scaled to the 0-1 range using a cumulative distribution function (CDF) to ensure they're in a comparable range across different environments. + This method normalizes the given rewards by updating the moving mean and variance statistics. The rewards are first standardized, and then scaled to the 0-1 range using a cumulative distribution function (CDF) to ensure they're in a comparable range across different environments. - Args: - rewards (torch.FloatTensor): The reward values to be normalized. + Args: + rewards (torch.FloatTensor): The reward values to be normalized. - Returns: - torch.FloatTensor: The normalized reward values. + Returns: + torch.FloatTensor: The normalized reward values. - Note: - - This function uses Welford's online algorithm to update the mean and variance. - - It standardizes the reward values using the updated mean and variance. - - It then scales the standardized values to the 0-1 range using the error function (erf) as a CDF. - """ + Note: + - This function uses Welford's online algorithm to update the mean and variance. + - It standardizes the reward values using the updated mean and variance. + - It then scales the standardized values to the 0-1 range using the error function (erf) as a CDF. + """ # Get the number of rewards (successful responses). new_count = rewards.numel() @@ -71,7 +79,11 @@ def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: # Update the old mean with the new mean and weights. self.mean = new_weight * new_mean + old_weight * self.mean # Update the old variance with the new variance and weights, and adjusting for the difference in means. - self.var = (new_weight * new_var) + (old_weight * self.var) + (new_weight * old_weight) * diff * diff + self.var = ( + (new_weight * new_var) + + (old_weight * self.var) + + (new_weight * old_weight) * diff * diff + ) # Update the old count with the new count, but don't exceed the limit. self.count = min(self.count_limit, self.count + new_count) @@ -80,32 +92,45 @@ def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: if self.var > 0: rewards /= torch.sqrt(self.var) # Scale the standardized rewards to the range [0, 1] using the error function as a cumulative distribution function (CDF). - rewards = 0.5 * (1 + torch.erf(rewards / torch.sqrt(torch.tensor([2.0])).to(rewards.device))) + rewards = 0.5 * ( + 1 + torch.erf(rewards / torch.sqrt(torch.tensor([2.0])).to(rewards.device)) + ) return rewards - def apply( self, prompt: str, responses: List[ bt.Synapse ], name: str) -> torch.FloatTensor: - """ Applies the reward model across each call. Unsuccessful responses are zeroed. - """ + def apply( + self, prompt: str, responses: List[bt.Synapse], name: str + ) -> torch.FloatTensor: + """Applies the reward model across each call. Unsuccessful responses are zeroed.""" # Get indices of correctly responding calls. - - successful_completions_indices: List[int] = [ idx for idx, resp in enumerate(responses) if resp.dendrite.status_code == 200 ] + + successful_completions_indices: List[int] = [ + idx + for idx, resp in enumerate(responses) + if resp.dendrite.status_code == 200 + ] # Get all completions from responding calls. - successful_completions: List[str] = [ responses[idx].completion.strip() for idx in successful_completions_indices] + successful_completions: List[str] = [ + responses[idx].completion.strip() for idx in successful_completions_indices + ] # Reward each completion. - successful_rewards = self.get_rewards( prompt, successful_completions, name ) + successful_rewards = self.get_rewards(prompt, successful_completions, name) # Softmax rewards across samples. - successful_rewards_normalized = self.normalize_rewards( successful_rewards ) + successful_rewards_normalized = self.normalize_rewards(successful_rewards) # Init zero rewards for all calls. - filled_rewards = torch.ones( len( responses ), dtype=torch.float32) * torch.nan - filled_rewards_normalized = torch.zeros( len( responses ), dtype=torch.float32) + filled_rewards = torch.ones(len(responses), dtype=torch.float32) * torch.nan + filled_rewards_normalized = torch.zeros(len(responses), dtype=torch.float32) # Fill reward tensor. - for idx, reward, reward_normalized in zip(successful_completions_indices, successful_rewards, successful_rewards_normalized): + for idx, reward, reward_normalized in zip( + successful_completions_indices, + successful_rewards, + successful_rewards_normalized, + ): filled_rewards[idx] = reward filled_rewards_normalized[idx] = reward_normalized @@ -113,18 +138,18 @@ def apply( self, prompt: str, responses: List[ bt.Synapse ], name: str) -> torch return filled_rewards, filled_rewards_normalized -class MockRewardModel( BaseRewardModel ): - +class MockRewardModel(BaseRewardModel): @property - def name(self) -> str: return self.mock_name + def name(self) -> str: + return self.mock_name - def __init__(self, mock_name: str = 'MockReward'): + def __init__(self, mock_name: str = "MockReward"): super().__init__() self.mock_name = mock_name - def apply( self, prompt: str, completion: List[str], name: str ) -> torch.FloatTensor: - mock_reward = torch.tensor( [0 for _ in completion], dtype=torch.float32 ) + def apply(self, prompt: str, completion: List[str], name: str) -> torch.FloatTensor: + mock_reward = torch.tensor([0 for _ in completion], dtype=torch.float32) return mock_reward, mock_reward def reset(self): - return self \ No newline at end of file + return self diff --git a/prompting/validators/reward/task_validator.py b/prompting/validators/reward/task_validator.py index e9dfb77..7225a3c 100644 --- a/prompting/validators/reward/task_validator.py +++ b/prompting/validators/reward/task_validator.py @@ -20,44 +20,61 @@ from .reward import BaseRewardModel -class TaskValidator( BaseRewardModel ): - +class TaskValidator(BaseRewardModel): @property - def name(self) -> str: return RewardModelType.task_validator.value + def name(self) -> str: + return RewardModelType.task_validator.value def __init__(self): super().__init__() - def reward( self, prompt: str, completion: str, name: str ) -> float: - summary_keywords = ['Summary:', 'Paraphrase:', 'Paraphrasing:', 'Paraphrased:'] - question_keywords = ['Question:', 'Query:', 'Q:'] - answer_keywords = ['Answer:', 'Response:', 'A:', 'Completion:'] - - completion_contains_answer = any(answer_keyword.lower() in completion.lower() for answer_keyword in answer_keywords) - completion_contains_question = any(question_keyword.lower() in completion.lower() for question_keyword in question_keywords) - completion_contains_summary = any(summary_keyword.lower() in completion.lower() for summary_keyword in summary_keywords) - - is_summarization_prompt = name == 'augment' - is_question_prompt = name.startswith('followup') - is_answer_prompt = name.startswith('answer') - - if (is_summarization_prompt or is_question_prompt) and completion_contains_answer: + def reward(self, prompt: str, completion: str, name: str) -> float: + summary_keywords = ["Summary:", "Paraphrase:", "Paraphrasing:", "Paraphrased:"] + question_keywords = ["Question:", "Query:", "Q:"] + answer_keywords = ["Answer:", "Response:", "A:", "Completion:"] + + completion_contains_answer = any( + answer_keyword.lower() in completion.lower() + for answer_keyword in answer_keywords + ) + completion_contains_question = any( + question_keyword.lower() in completion.lower() + for question_keyword in question_keywords + ) + completion_contains_summary = any( + summary_keyword.lower() in completion.lower() + for summary_keyword in summary_keywords + ) + + is_summarization_prompt = name == "augment" + is_question_prompt = name.startswith("followup") + is_answer_prompt = name.startswith("answer") + + if ( + is_summarization_prompt or is_question_prompt + ) and completion_contains_answer: return 0.0 - if (is_summarization_prompt or is_answer_prompt) and completion_contains_question: + if ( + is_summarization_prompt or is_answer_prompt + ) and completion_contains_question: return 0.0 if not is_summarization_prompt and completion_contains_summary: - return 0.0 + return 0.0 return 1 - def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: - return torch.tensor( [self.reward( prompt, completion, name ) for completion in completions], dtype=torch.float32) + def get_rewards( + self, prompt: str, completions: List[str], name: str + ) -> torch.FloatTensor: + return torch.tensor( + [self.reward(prompt, completion, name) for completion in completions], + dtype=torch.float32, + ) - def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: + def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: return rewards def reset(self): pass - diff --git a/prompting/validators/utils.py b/prompting/validators/utils.py index 4f419a4..1961a2c 100644 --- a/prompting/validators/utils.py +++ b/prompting/validators/utils.py @@ -27,15 +27,21 @@ def should_reinit_wandb(self): # Check if wandb run needs to be rolled over. - return not self.config.wandb.off and self.step and self.step % self.config.wandb.run_step_length == 0 + return ( + not self.config.wandb.off + and self.step + and self.step % self.config.wandb.run_step_length == 0 + ) def init_wandb(self, reinit=False): """Starts a new wandb run.""" - tags = [self.wallet.hotkey.ss58_address, - validators.__version__, - str(validators.__spec_version__), - f'netuid_{self.metagraph.netuid}'] + tags = [ + self.wallet.hotkey.ss58_address, + validators.__version__, + str(validators.__spec_version__), + f"netuid_{self.metagraph.netuid}", + ] if self.config.mock: tags.append("mock") @@ -49,8 +55,11 @@ def init_wandb(self, reinit=False): if self.config.neuron.disable_log_rewards: tags.append("disable_log_rewards") - wandb_config = {key: copy.deepcopy(self.config.get(key, None)) for key in ('neuron', 'reward', 'netuid', 'wandb')} - wandb_config['neuron'].pop('full_path', None) + wandb_config = { + key: copy.deepcopy(self.config.get(key, None)) + for key in ("neuron", "reward", "netuid", "wandb") + } + wandb_config["neuron"].pop("full_path", None) self.wandb = wandb.init( anonymous="allow", @@ -90,7 +99,7 @@ def checkpoint(self): save_state(self) -def resync_metagraph(self: 'validators.neuron.neuron'): +def resync_metagraph(self: "validators.neuron.neuron"): """Resyncs the metagraph and updates the hotkeys and moving averages based on the new metagraph.""" bt.logging.info("resync_metagraph()") @@ -104,7 +113,9 @@ def resync_metagraph(self: 'validators.neuron.neuron'): metagraph_axon_info_updated = previous_metagraph.axons != self.metagraph.axons if metagraph_axon_info_updated: - bt.logging.info("Metagraph updated, re-syncing hotkeys, dendrite pool and moving averages") + bt.logging.info( + "Metagraph updated, re-syncing hotkeys, dendrite pool and moving averages" + ) # Zero out all hotkeys that have been replaced. for uid, hotkey in enumerate(self.hotkeys): @@ -125,8 +136,6 @@ def resync_metagraph(self: 'validators.neuron.neuron'): # Update the hotkeys. self.hotkeys = copy.deepcopy(self.metagraph.hotkeys) - - def resync_linear_layer( @@ -142,8 +151,12 @@ def resync_linear_layer( metagraph (:obj: bt.metagraph.Metagraph): Latest state of the metagraph with updated uids and hotkeys """ - uids_hotkeys_state_dict = dict(zip(previous_metagraph.uids.tolist(), previous_metagraph.hotkeys)) - latest_uids_hotkeys_state_dict = dict(zip(metagraph.uids.tolist(), metagraph.hotkeys)) + uids_hotkeys_state_dict = dict( + zip(previous_metagraph.uids.tolist(), previous_metagraph.hotkeys) + ) + latest_uids_hotkeys_state_dict = dict( + zip(metagraph.uids.tolist(), metagraph.hotkeys) + ) updated_uids_indices = [] for uid, latest_hotkey in latest_uids_hotkeys_state_dict.items(): @@ -162,7 +175,9 @@ def resync_linear_layer( linear_layer.weight[index].data.copy_(reinitialized_weights) -def check_uid_availability(metagraph: "bt.metagraph.Metagraph", uid: int, vpermit_tao_limit: int) -> bool: +def check_uid_availability( + metagraph: "bt.metagraph.Metagraph", uid: int, vpermit_tao_limit: int +) -> bool: """Check if uid is available. The UID should be available if it is serving and has less than vpermit_tao_limit stake Args: metagraph (:obj: bt.metagraph.Metagraph): Metagraph object @@ -187,7 +202,7 @@ def save_state(self): bt.logging.info("save_state()") try: neuron_state_dict = { - "neuron_weights": self.moving_averaged_scores.to('cpu').tolist(), + "neuron_weights": self.moving_averaged_scores.to("cpu").tolist(), "neuron_hotkeys": self.hotkeys, } torch.save(neuron_state_dict, f"{self.config.neuron.full_path}/model.torch") @@ -206,24 +221,30 @@ def save_state(self): torch.save(gating_model_linear_layer_dict, gating_model_file_path) if not self.config.wandb.off: - wandb.log({ - "step": self.step, - "block": ttl_get_block(self), - **neuron_state_dict - }) + wandb.log( + {"step": self.step, "block": ttl_get_block(self), **neuron_state_dict} + ) if not self.config.wandb.off and self.config.wandb.track_gating_model: - model_artifact = wandb.Artifact(f"{gating_model_name}_gating_linear_layer", type="model") + model_artifact = wandb.Artifact( + f"{gating_model_name}_gating_linear_layer", type="model" + ) model_artifact.add_file(gating_model_file_path) self.wandb.log_artifact(model_artifact) - bt.logging.success(prefix="Saved gating model", sufix=f"{gating_model_file_path}") + bt.logging.success( + prefix="Saved gating model", sufix=f"{gating_model_file_path}" + ) except Exception as e: bt.logging.warning(f"Failed to save gating model with error: {e}") try: # Save diversity model. - diversity_model_dict = {"historic_embeddings": self.diversity_model.historic_embeddings.to('cpu')} - diversity_model_file_path = f"{self.config.neuron.full_path}/diversity_model.pth" + diversity_model_dict = { + "historic_embeddings": self.diversity_model.historic_embeddings.to("cpu") + } + diversity_model_file_path = ( + f"{self.config.neuron.full_path}/diversity_model.pth" + ) torch.save(diversity_model_dict, diversity_model_file_path) bt.logging.success( prefix="Saved diversity model", @@ -255,9 +276,13 @@ def load_state(self): try: # Load diversity model. - diversity_model_file_path = f"{self.config.neuron.full_path}/diversity_model.pth" + diversity_model_file_path = ( + f"{self.config.neuron.full_path}/diversity_model.pth" + ) diversity_model_dict = torch.load(diversity_model_file_path) - self.diversity_model.historic_embeddings = diversity_model_dict["historic_embeddings"].to(self.device) + self.diversity_model.historic_embeddings = diversity_model_dict[ + "historic_embeddings" + ].to(self.device) bt.logging.success( prefix="Reloaded diversity model", sufix=f"{diversity_model_file_path} {list(self.diversity_model.historic_embeddings.shape)}", diff --git a/prompting/validators/weights.py b/prompting/validators/weights.py index 6875fc0..352a5f4 100644 --- a/prompting/validators/weights.py +++ b/prompting/validators/weights.py @@ -29,7 +29,10 @@ def should_set_weights(self) -> bool: if self.config.neuron.disable_set_weights: return False - return ttl_get_block(self) % self.config.neuron.epoch_length < self.prev_block % self.config.neuron.epoch_length + return ( + ttl_get_block(self) % self.config.neuron.epoch_length + < self.prev_block % self.config.neuron.epoch_length + ) def set_weights(self): @@ -41,7 +44,10 @@ def set_weights(self): bt.logging.trace("top10 uids", raw_weights.sort()[1]) # Process the raw weights to final_weights via subtensor limitations. - (processed_weight_uids, processed_weights,) = bt.utils.weight_utils.process_weights_for_netuid( + ( + processed_weight_uids, + processed_weights, + ) = bt.utils.weight_utils.process_weights_for_netuid( uids=self.metagraph.uids.to("cpu"), weights=raw_weights.to("cpu"), netuid=self.config.netuid, From 9ec76c83831806fa13e35dc83c6f945a0ae06a91 Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Tue, 12 Sep 2023 01:32:09 +0000 Subject: [PATCH 3/8] pull in baseminer to prompting/, move attach() to setup so logging shows correct route --- baseminer/miner.py | 149 ------------------ neurons/bittensorLM/bittensorlm.py | 2 +- neurons/template.py | 2 +- .../baseminer}/__init__.py | 2 +- .../baseminer}/blacklist.py | 0 {baseminer => prompting/baseminer}/config.py | 0 {baseminer => prompting/baseminer}/forward.py | 0 .../baseminer/miner.py | 58 ++++++- {baseminer => prompting/baseminer}/mock.py | 0 .../baseminer}/priority.py | 0 {baseminer => prompting/baseminer}/run.py | 8 - .../baseminer}/set_weights.py | 0 prompting/protocol.py | 4 - 13 files changed, 54 insertions(+), 171 deletions(-) delete mode 100644 baseminer/miner.py rename {baseminer => prompting/baseminer}/__init__.py (97%) rename {baseminer => prompting/baseminer}/blacklist.py (100%) rename {baseminer => prompting/baseminer}/config.py (100%) rename {baseminer => prompting/baseminer}/forward.py (100%) rename baseminer/revolution_miner.py => prompting/baseminer/miner.py (79%) rename {baseminer => prompting/baseminer}/mock.py (100%) rename {baseminer => prompting/baseminer}/priority.py (100%) rename {baseminer => prompting/baseminer}/run.py (94%) rename {baseminer => prompting/baseminer}/set_weights.py (100%) diff --git a/baseminer/miner.py b/baseminer/miner.py deleted file mode 100644 index 3a204d3..0000000 --- a/baseminer/miner.py +++ /dev/null @@ -1,149 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import copy -import wandb -import argparse -import threading -import bittensor as bt - -from abc import ABC, abstractmethod -from typing import List, Dict, Union, Tuple - -from .run import run -from .mock import MockSubtensor -from .config import check_config, get_config - - -class BaseMiner(ABC): - @classmethod - def config(cls) -> "bt.Config": - return get_config() - - @classmethod - @abstractmethod - def add_args(cls, parser: argparse.ArgumentParser): - ... - - @abstractmethod - def forward(self, messages: List[Dict[str, str]]) -> str: - ... - - def priority(self, forward_call: "bt.TextPromptingForwardCall") -> float: - raise NotImplementedError("priority not implemented in subclass") - - def blacklist( - self, forward_call: "bt.TextPromptingForwardCall" - ) -> Union[Tuple[bool, str], bool]: - raise NotImplementedError("blacklist not implemented in subclass") - - def __init__( - self, - config: "bt.Config" = None, - axon: "bt.axon" = None, - wallet: "bt.Wallet" = None, - subtensor: "bt.Subtensor" = None, - ): - # Instantiate and check configs. - # Grab super config. - super_config = copy.deepcopy(config or BaseMiner.config()) - - # Grab child config - self.config = self.config() - - # Merge them, but overwrite from the child config. - self.config.merge(super_config) - check_config(BaseMiner, self.config) - - # Instantiate prompt cache where key is the encoded prompt and value is a tuple of hotkey and block - self.prompt_cache: Dict[str, Tuple[str, int]] = {} - - # Instantiate logging. - bt.logging(config=self.config, logging_dir=self.config.miner.full_path) - - # Warn if blacklist checking is turned off. - if ( - not self.config.miner.blacklist.force_validator_permit - or self.config.miner.blacklist.allow_non_registered - ): - bt.logging.warning( - "Blacklist protections are disabled! " - f"Force Validator Permit: {self.config.miner.blacklist.force_validator_permit}, " - f"Allow Non-Registered: {self.config.miner.blacklist.allow_non_registered}" - ) - - # Instantiate subtensor. - if self.config.miner.mock_subtensor: - self.subtensor = subtensor or MockSubtensor(self.config) - else: - self.subtensor = subtensor or bt.subtensor(self.config) - - # Instantiate metagraph. - self.metagraph = self.subtensor.metagraph(self.config.netuid) - self.metagraph.sync(lite=True, subtensor=self.subtensor) - - # Instantiate wallet. - self.wallet = wallet or bt.wallet(self.config) - - # Instantiate axon. - self.axon = axon or bt.axon( - wallet=self.wallet, metagraph=self.metagraph, config=self.config - ) - - # Init wandb. - if self.config.wandb.on: - tags = [self.wallet.hotkey.ss58_address, f"netuid_{self.config.netuid}"] - self.wandb_run = wandb.init( - project=self.config.wandb.project_name, - entity=self.config.wandb.entity, - config=self.config, - mode="online" if self.config.wandb.on else "offline", - dir=self.config.miner.full_path, - magic=True, - tags=tags, - ) - # Instantiate runners. - self.should_exit: bool = False - self.is_running: bool = False - self.thread: threading.Thread = None - - self.request_timestamps = {} - - def run(self): - run(self) - - def run_in_background_thread(self): - if not self.is_running: - bt.logging.debug(f"Starting miner background thread") - self.should_exit = False - self.thread = threading.Thread(target=self.run, daemon=True) - self.thread.start() - self.is_running = True - bt.logging.debug(f"Started") - - def stop_run_thread(self): - if self.is_running: - bt.logging.debug(f"Stopping miner background thread...") - self.should_exit = True - self.thread.join(5) - bt.logging.debug(f"Stopped") - - def __enter__(self): - self.run_in_background_thread() - - def __exit__(self, exc_type, exc_value, traceback): - self.stop_run_thread() diff --git a/neurons/bittensorLM/bittensorlm.py b/neurons/bittensorLM/bittensorlm.py index 1c95409..5cdd9f9 100644 --- a/neurons/bittensorLM/bittensorlm.py +++ b/neurons/bittensorLM/bittensorlm.py @@ -32,7 +32,7 @@ pipeline, ) -from baseminer.revolution_miner import Miner +from prompting.baseminer.miner import Miner from prompting.protocol import Prompting diff --git a/neurons/template.py b/neurons/template.py index a192d73..4552762 100644 --- a/neurons/template.py +++ b/neurons/template.py @@ -19,7 +19,7 @@ # Bittensor Miner Template:# Step 1: Import necessary libraries and modules -from miners.revolution_miner import Miner +from prompting.baseminer.miner import Miner from prompting.protocol import Prompting import bittensor as bt diff --git a/baseminer/__init__.py b/prompting/baseminer/__init__.py similarity index 97% rename from baseminer/__init__.py rename to prompting/baseminer/__init__.py index d730dc1..766410a 100644 --- a/baseminer/__init__.py +++ b/prompting/baseminer/__init__.py @@ -19,7 +19,7 @@ from . import blacklist from . import config from . import forward -from . import revolution_miner as miner +from . import miner from . import mock from . import priority from . import run diff --git a/baseminer/blacklist.py b/prompting/baseminer/blacklist.py similarity index 100% rename from baseminer/blacklist.py rename to prompting/baseminer/blacklist.py diff --git a/baseminer/config.py b/prompting/baseminer/config.py similarity index 100% rename from baseminer/config.py rename to prompting/baseminer/config.py diff --git a/baseminer/forward.py b/prompting/baseminer/forward.py similarity index 100% rename from baseminer/forward.py rename to prompting/baseminer/forward.py diff --git a/baseminer/revolution_miner.py b/prompting/baseminer/miner.py similarity index 79% rename from baseminer/revolution_miner.py rename to prompting/baseminer/miner.py index 5b9e369..1d568bb 100644 --- a/baseminer/revolution_miner.py +++ b/prompting/baseminer/miner.py @@ -30,18 +30,18 @@ import bittensor as bt from prompting.protocol import Prompting -from baseminer.priority import priority -from baseminer.blacklist import blacklist -from baseminer.run import run -from baseminer.set_weights import set_weights -from baseminer.config import check_config, get_config +from prompting.baseminer.priority import priority +from prompting.baseminer.blacklist import blacklist +from prompting.baseminer.run import run +from prompting.baseminer.set_weights import set_weights +from prompting.baseminer.config import check_config, get_config class Miner(ABC): """ The Miner class is an abstract base class that defines the structure for Bittensor miners. - Subclassed should implement the `prompt` method to define their own response logic. + Subclasses should implement the `prompt` method to define their own response logic. The `blacklist` and `priority` methods can also be overridden to provide custom logic. """ @@ -108,7 +108,14 @@ def __init__(self, config=None, axon=None, wallet=None, subtensor=None): # The axon handles request processing, allowing validators to send this process requests. self.axon = axon or bt.axon(wallet=self.wallet, port=self.config.axon.port) - bt.logging.info(f"Axon {self.axon}") + # Attach determiners which functions are called when servicing a request. + bt.logging.info(f"Attaching forward function to axon.") + self.axon.attach( + forward_fn=self.prompt, + blacklist_fn=self.blacklist, + priority_fn=self.priority, + ) + bt.logging.info(f"Axon created: {self.axon}") if self.config.wandb.on: tags = [self.wallet.hotkey.ss58_address, f"netuid_{self.config.netuid}"] @@ -131,15 +138,52 @@ def __init__(self, config=None, axon=None, wallet=None, subtensor=None): @abstractmethod def config(self) -> "bt.Config": + """ + Abstract method for configuring the Miner. + + Subclasses should implement this method to return a configuration object that dictates + various settings and parameters for the miner's operation. The returned configuration + object will typically contain parameters like network settings, logging preferences, + and other operational parameters. + + Returns: + bt.Config: A configuration object specific to the miner subclass. + """ ... @classmethod @abstractmethod def add_args(cls, parser: argparse.ArgumentParser): + """ + Abstract class method to add miner-specific arguments to a command line parser. + + This method should be implemented by subclasses to introduce any command-line + arguments that the miner might require for operation. + + Args: + parser (argparse.ArgumentParser): The command line argument parser to which + the miner-specific arguments should be added. + """ ... @abstractmethod def prompt(self, synapse: Prompting) -> Prompting: + """ + Abstract method to handle and respond to incoming requests to the miner. + + Subclasses should implement this method to define how the miner processes + incoming requests and what responses should be sent back. The logic can include + operations like data processing, validation, or any other computation as required + by the specific mining operation. + + Args: + synapse (Prompting): The incoming request object encapsulating the details + of the request. This must contain `messages` and `roles` as fields. + + Returns: + Prompting: The response object that should be sent back in reply to the + incoming request. This is essentially the filled synapse request object. + """ ... def blacklist(self, synapse: Prompting) -> Tuple[bool, str]: diff --git a/baseminer/mock.py b/prompting/baseminer/mock.py similarity index 100% rename from baseminer/mock.py rename to prompting/baseminer/mock.py diff --git a/baseminer/priority.py b/prompting/baseminer/priority.py similarity index 100% rename from baseminer/priority.py rename to prompting/baseminer/priority.py diff --git a/baseminer/run.py b/prompting/baseminer/run.py similarity index 94% rename from baseminer/run.py rename to prompting/baseminer/run.py index f24fdaf..d5a66b9 100644 --- a/baseminer/run.py +++ b/prompting/baseminer/run.py @@ -54,14 +54,6 @@ def run(self): ) self.subtensor.register(netuid=self.config.netuid, wallet=self.wallet) - # Attach determiners which functions are called when servicing a request. - bt.logging.info(f"Attaching forward function to axon.") - self.axon.attach( - forward_fn=self.prompt, # TODO: make sure this is the subclass's prompt. - blacklist_fn=self.blacklist, - priority_fn=self.priority, - ) - # Serve passes the axon information to the network + netuid we are hosting on. # This will auto-update if the axon port of external ip have changed. bt.logging.info( diff --git a/baseminer/set_weights.py b/prompting/baseminer/set_weights.py similarity index 100% rename from baseminer/set_weights.py rename to prompting/baseminer/set_weights.py diff --git a/prompting/protocol.py b/prompting/protocol.py index eb2b79d..e83337f 100644 --- a/prompting/protocol.py +++ b/prompting/protocol.py @@ -31,10 +31,6 @@ class Config: def deserialize(self): return self - # @property - # def required_hash_fields(self) -> List[str]: - # return ['messages'] - roles: List[str] = pydantic.Field(..., allow_mutation=False) messages: List[str] = pydantic.Field(..., allow_mutation=False) completion: str = "" From 1881ea8be04764d574caf806f3e07323cc5d45d3 Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Tue, 12 Sep 2023 01:33:47 +0000 Subject: [PATCH 4/8] change k back to varible value --- prompting/validators/forward.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prompting/validators/forward.py b/prompting/validators/forward.py index 37f1308..716f106 100644 --- a/prompting/validators/forward.py +++ b/prompting/validators/forward.py @@ -86,7 +86,7 @@ async def run_step( event = {"name": name} start_time = time.time() # Get the list of uids to query for this step. - uids = get_random_uids(self, k=10, exclude=exclude).to(self.device) + uids = get_random_uids(self, k=k, exclude=exclude).to(self.device) axons = [self.metagraph.axons[uid] for uid in uids] synapse = prompting.protocol.Prompting(roles=["user"], messages=[prompt]) From e74dd0988f82a4cb46738d58d01df284f86c4dab Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Tue, 12 Sep 2023 01:47:31 +0000 Subject: [PATCH 5/8] add openai miner --- neurons/openai/README.md | 149 ++++++++++++++++++++++++++++++++ neurons/openai/miner.py | 116 +++++++++++++++++++++++++ neurons/openai/requirements.txt | 1 + 3 files changed, 266 insertions(+) create mode 100644 neurons/openai/README.md create mode 100644 neurons/openai/miner.py create mode 100644 neurons/openai/requirements.txt diff --git a/neurons/openai/README.md b/neurons/openai/README.md new file mode 100644 index 0000000..9cb0226 --- /dev/null +++ b/neurons/openai/README.md @@ -0,0 +1,149 @@ +# OpenAI Bittensor Miner +This repository contains a Bittensor Miner that uses OpenAI's GPT-3.5-turbo model as its synapse. The miner connects to the Bittensor network, registers its wallet, and serves the GPT-3.5-turbo model to the network. + +## Prerequisites + +- Python 3.8+ +- OpenAI Python API (https://github.com/openai/openai) + +## Installation + +1. Clone the repository +2. Install the required packages with `pip install -r requirements.txt` +3. Ensure that you have your OpenAI key in your os environment variable +```bash +# Sets your openai key in os envs variable +export OPENAI_API_KEY='your_openai_key_here' + +# Verifies if openai key is set correctly +echo $OPENAI_API_KEY +``` + +For more configuration options related to the wallet, axon, subtensor, logging, and metagraph, please refer to the Bittensor documentation. + +## Example Usage + +To run the OpenAI Bittensor Miner with default settings, use the following command: + +``` +python3 -m pip install -r openminers/text_to_text/miner/openai/requirements.txt +export OPENAI_API_KEY='sk-yourkey' +python3 openminers/text_to_text/miner/openai/miner.py +``` + +# Full Usage +``` +usage: miner.py [-h] [--openai.suffix OPENAI.SUFFIX] [--openai.max_tokens OPENAI.MAX_TOKENS] + [--openai.temperature OPENAI.TEMPERATURE] [--openai.top_p OPENAI.TOP_P] [--openai.n OPENAI.N] + [--openai.presence_penalty OPENAI.PRESENCE_PENALTY] [--openai.frequency_penalty OPENAI.FREQUENCY_PENALTY] + [--openai.model_name OPENAI.MODEL_NAME] [--netuid NETUID] [--miner.name NEURON.NAME] + [--miner.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--miner.no_set_weights] + [--miner.max_batch_size NEURON.MAX_BATCH_SIZE] [--miner.max_sequence_len NEURON.MAX_SEQUENCE_LEN] + [--miner.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...]] [--miner.blacklist.allow_non_registered] + [--miner.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] [--miner.default_priority NEURON.DEFAULT_PRIORITY] + [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] + [--wallet.reregister WALLET.REREGISTER] [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] + [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] + [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] + [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] + [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] + [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] + [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] + [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] + [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] + [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] + [--logging.logging_dir LOGGING.LOGGING_DIR] [--metagraph._mock] [--config CONFIG] [--strict] + +optional arguments: + -h, --help show this help message and exit + --openai.suffix OPENAI.SUFFIX + The suffix that comes after a completion of inserted text. + --openai.max_tokens OPENAI.MAX_TOKENS + The maximum number of tokens to generate in the completion. + --openai.temperature OPENAI.TEMPERATURE + Sampling temperature to use, between 0 and 2. + --openai.top_p OPENAI.TOP_P + Nucleus sampling parameter, top_p probability mass. + --openai.n OPENAI.N How many completions to generate for each prompt. + --openai.presence_penalty OPENAI.PRESENCE_PENALTY + Penalty for tokens based on their presence in the text so far. + --openai.frequency_penalty OPENAI.FREQUENCY_PENALTY + Penalty for tokens based on their frequency in the text so far. + --openai.model_name OPENAI.MODEL_NAME + OpenAI model to use for completion. + --netuid NETUID Subnet netuid + --miner.name NEURON.NAME + Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name + --miner.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH + Blocks until the miner sets weights on chain + --miner.no_set_weights + If True, the model does not set weights. + --miner.max_batch_size NEURON.MAX_BATCH_SIZE + The maximum batch size for forward requests. + --miner.max_sequence_len NEURON.MAX_SEQUENCE_LEN + The maximum sequence length for forward requests. + --miner.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...] + To blacklist certain hotkeys + --miner.blacklist.allow_non_registered + If True, the miner will allow non-registered hotkeys to mine. + --miner.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE + Set default stake for miners. + --miner.default_priority NEURON.DEFAULT_PRIORITY + Set default priority for miners. + --wallet.name WALLET.NAME + The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) + --wallet.hotkey WALLET.HOTKEY + The name of wallet's hotkey. + --wallet.path WALLET.PATH + The path to your bittensor wallets + --wallet._mock To turn on wallet mocking for testing purposes. + --wallet.reregister WALLET.REREGISTER + Whether to reregister the wallet if it is not already registered. + --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS + maximum number of threads in thread pool + --axon.priority.maxsize AXON.PRIORITY.MAXSIZE + maximum size of tasks in priority queue + --axon.port AXON.PORT + The local port this axon endpoint is bound to. i.e. 8091 + --axon.ip AXON.IP The local ip this axon binds to. ie. [::] + --axon.external_port AXON.EXTERNAL_PORT + The public port this axon broadcasts to the network. i.e. 8091 + --axon.external_ip AXON.EXTERNAL_IP + The external ip this axon broadcasts to the network to. ie. [::] + --axon.max_workers AXON.MAX_WORKERS + The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes + new worker threads to service requests up to this number. + --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS + Maximum number of allowed active connections + --subtensor.network SUBTENSOR.NETWORK + The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- + mock (creates a mock connection (for testing)) If this option is set it overloads subtensor.chain_endpoint with an + entry point node from that network. + --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT + The subtensor endpoint flag. If set, overrides the --network flag. + --subtensor._mock To turn on subtensor mocking for testing purposes. + --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES + Number of processors to use for registration + --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL + The number of nonces to process before checking for next block during registration + --subtensor.register.no_output_in_place, --no_output_in_place + Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. + --subtensor.register.verbose + Whether to ouput the registration statistics verbosely. + --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda + Set flag to use CUDA to register. + --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda + Set flag to not use CUDA for registration + --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] + Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). + --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB + Set the number of Threads Per Block for CUDA. + --logging.debug Turn on bittensor debugging information + --logging.trace Turn on bittensor trace level information + --logging.record_log Turns on logging to file. + --logging.logging_dir LOGGING.LOGGING_DIR + Logging default root directory. + --metagraph._mock To turn on metagraph mocking for testing purposes. + --config CONFIG If set, defaults are overridden by passed file. + --strict If flagged, config will check that only exact arguemnts have been set. +``` \ No newline at end of file diff --git a/neurons/openai/miner.py b/neurons/openai/miner.py new file mode 100644 index 0000000..60dd5df --- /dev/null +++ b/neurons/openai/miner.py @@ -0,0 +1,116 @@ +# The MIT License (MIT) +# Copyright © 2021 Yuma Rao + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the “Software”), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of +# the Software. + +# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +import os +import time +import openai +import argparse +import bittensor +from typing import List, Dict, Optional + +from prompting.baseminer.miner import Miner +from prompting.protocol import Prompting + +class OpenAIMiner(Miner): + + @classmethod + def add_args(cls, parser: argparse.ArgumentParser): + parser.add_argument( + "--openai.suffix", + type=str, + default=None, + help="The suffix that comes after a completion of inserted text.", + ) + parser.add_argument( + "--openai.max_tokens", + type=int, + default=100, + help="The maximum number of tokens to generate in the completion.", + ) + parser.add_argument( + "--openai.temperature", + type=float, + default=0.4, + help="Sampling temperature to use, between 0 and 2.", + ) + parser.add_argument( + "--openai.top_p", + type=float, + default=1, + help="Nucleus sampling parameter, top_p probability mass.", + ) + parser.add_argument( + "--openai.n", + type=int, + default=1, + help="How many completions to generate for each prompt.", + ) + parser.add_argument( + "--openai.presence_penalty", + type=float, + default=0.1, + help="Penalty for tokens based on their presence in the text so far.", + ) + parser.add_argument( + "--openai.frequency_penalty", + type=float, + default=0.1, + help="Penalty for tokens based on their frequency in the text so far.", + ) + parser.add_argument( + "--openai.model_name", + type=str, + default="gpt-3.5-turbo", + help="OpenAI model to use for completion.", + ) + + def config(self) -> "bittensor.Config": + parser = argparse.ArgumentParser(description="OpenAI Miner Configs") + self.add_args(parser) + return bittensor.config(parser) + + def __init__(self, api_key: Optional[str] = None, *args, **kwargs): + super(OpenAIMiner, self).__init__(*args, **kwargs) + if api_key is None: + raise ValueError( + "OpenAI API key is None: the miner requires an `OPENAI_API_KEY` defined in the environment variables or as an direct argument into the constructor." + ) + if self.config.wandb.on: + self.wandb_run.tags = self.wandb_run.tags + ("openai_miner",) + openai.api_key = api_key + + def prompt(self, synapse: Prompting) -> Prompting: + resp = openai.ChatCompletion.create( + model=self.config.openai.model_name, + messages=list(zip(synapse.roles, synapse.messages)), + temperature=self.config.openai.temperature, + max_tokens=self.config.openai.max_tokens, + top_p=self.config.openai.top_p, + frequency_penalty=self.config.openai.frequency_penalty, + presence_penalty=self.config.openai.presence_penalty, + n=self.config.openai.n, + )["choices"][0]["message"]["content"] + synapse.completion = resp + return synapse + + +if __name__ == "__main__": + openai_api_key = os.getenv("OPENAI_API_KEY") + + with OpenAIMiner(api_key=openai_api_key): + while True: + print("running...", time.time()) + time.sleep(1) \ No newline at end of file diff --git a/neurons/openai/requirements.txt b/neurons/openai/requirements.txt new file mode 100644 index 0000000..f0dd0ae --- /dev/null +++ b/neurons/openai/requirements.txt @@ -0,0 +1 @@ +openai \ No newline at end of file From d4ff3a4f0d6c474863ca99be8e5f09dafceac383 Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Tue, 12 Sep 2023 01:50:21 +0000 Subject: [PATCH 6/8] change config from classmethod to abstract --- neurons/bittensorLM/bittensorlm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/neurons/bittensorLM/bittensorlm.py b/neurons/bittensorLM/bittensorlm.py index 5cdd9f9..7c09b89 100644 --- a/neurons/bittensorLM/bittensorlm.py +++ b/neurons/bittensorLM/bittensorlm.py @@ -50,10 +50,10 @@ def __call__( class CerebrasBTLMMiner(Miner): - @classmethod - def config(cls) -> "bt.Config": + + def config(self) -> "bt.Config": parser = argparse.ArgumentParser(description="Bittensor-LM Miner Config") - cls.add_args(parser) + self.add_args(parser) return bt.config(parser) @classmethod From 13f9b714d0bab56f8a7387c278bae1eab80c7306 Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Tue, 12 Sep 2023 02:04:01 +0000 Subject: [PATCH 7/8] fix messages --- neurons/bittensorLM/bittensorlm.py | 1 - neurons/openai/miner.py | 12 ++++++++--- prompting/baseminer/miner.py | 33 +++++++++++++++--------------- prompting/validators/forward.py | 2 +- 4 files changed, 26 insertions(+), 22 deletions(-) diff --git a/neurons/bittensorLM/bittensorlm.py b/neurons/bittensorLM/bittensorlm.py index 7c09b89..91885f7 100644 --- a/neurons/bittensorLM/bittensorlm.py +++ b/neurons/bittensorLM/bittensorlm.py @@ -50,7 +50,6 @@ def __call__( class CerebrasBTLMMiner(Miner): - def config(self) -> "bt.Config": parser = argparse.ArgumentParser(description="Bittensor-LM Miner Config") self.add_args(parser) diff --git a/neurons/openai/miner.py b/neurons/openai/miner.py index 60dd5df..064741f 100644 --- a/neurons/openai/miner.py +++ b/neurons/openai/miner.py @@ -24,8 +24,8 @@ from prompting.baseminer.miner import Miner from prompting.protocol import Prompting -class OpenAIMiner(Miner): +class OpenAIMiner(Miner): @classmethod def add_args(cls, parser: argparse.ArgumentParser): parser.add_argument( @@ -93,9 +93,14 @@ def __init__(self, api_key: Optional[str] = None, *args, **kwargs): openai.api_key = api_key def prompt(self, synapse: Prompting) -> Prompting: + messages = [ + {"role": role, "content": message} + for role, message in zip(synapse.roles, synapse.messages) + ] + bittensor.logging.debug(f"messages: {messages}") resp = openai.ChatCompletion.create( model=self.config.openai.model_name, - messages=list(zip(synapse.roles, synapse.messages)), + messages=messages, temperature=self.config.openai.temperature, max_tokens=self.config.openai.max_tokens, top_p=self.config.openai.top_p, @@ -104,6 +109,7 @@ def prompt(self, synapse: Prompting) -> Prompting: n=self.config.openai.n, )["choices"][0]["message"]["content"] synapse.completion = resp + bittensor.logging.debug(f"completion: {resp}") return synapse @@ -113,4 +119,4 @@ def prompt(self, synapse: Prompting) -> Prompting: with OpenAIMiner(api_key=openai_api_key): while True: print("running...", time.time()) - time.sleep(1) \ No newline at end of file + time.sleep(1) diff --git a/prompting/baseminer/miner.py b/prompting/baseminer/miner.py index 1d568bb..d074737 100644 --- a/prompting/baseminer/miner.py +++ b/prompting/baseminer/miner.py @@ -37,7 +37,6 @@ from prompting.baseminer.config import check_config, get_config - class Miner(ABC): """ The Miner class is an abstract base class that defines the structure for Bittensor miners. @@ -139,13 +138,13 @@ def __init__(self, config=None, axon=None, wallet=None, subtensor=None): @abstractmethod def config(self) -> "bt.Config": """ - Abstract method for configuring the Miner. - - Subclasses should implement this method to return a configuration object that dictates - various settings and parameters for the miner's operation. The returned configuration - object will typically contain parameters like network settings, logging preferences, + Abstract method for configuring the Miner. + + Subclasses should implement this method to return a configuration object that dictates + various settings and parameters for the miner's operation. The returned configuration + object will typically contain parameters like network settings, logging preferences, and other operational parameters. - + Returns: bt.Config: A configuration object specific to the miner subclass. """ @@ -156,12 +155,12 @@ def config(self) -> "bt.Config": def add_args(cls, parser: argparse.ArgumentParser): """ Abstract class method to add miner-specific arguments to a command line parser. - - This method should be implemented by subclasses to introduce any command-line + + This method should be implemented by subclasses to introduce any command-line arguments that the miner might require for operation. Args: - parser (argparse.ArgumentParser): The command line argument parser to which + parser (argparse.ArgumentParser): The command line argument parser to which the miner-specific arguments should be added. """ ... @@ -170,18 +169,18 @@ def add_args(cls, parser: argparse.ArgumentParser): def prompt(self, synapse: Prompting) -> Prompting: """ Abstract method to handle and respond to incoming requests to the miner. - - Subclasses should implement this method to define how the miner processes - incoming requests and what responses should be sent back. The logic can include - operations like data processing, validation, or any other computation as required + + Subclasses should implement this method to define how the miner processes + incoming requests and what responses should be sent back. The logic can include + operations like data processing, validation, or any other computation as required by the specific mining operation. - + Args: - synapse (Prompting): The incoming request object encapsulating the details + synapse (Prompting): The incoming request object encapsulating the details of the request. This must contain `messages` and `roles` as fields. Returns: - Prompting: The response object that should be sent back in reply to the + Prompting: The response object that should be sent back in reply to the incoming request. This is essentially the filled synapse request object. """ ... diff --git a/prompting/validators/forward.py b/prompting/validators/forward.py index 716f106..37f1308 100644 --- a/prompting/validators/forward.py +++ b/prompting/validators/forward.py @@ -86,7 +86,7 @@ async def run_step( event = {"name": name} start_time = time.time() # Get the list of uids to query for this step. - uids = get_random_uids(self, k=k, exclude=exclude).to(self.device) + uids = get_random_uids(self, k=10, exclude=exclude).to(self.device) axons = [self.metagraph.axons[uid] for uid in uids] synapse = prompting.protocol.Prompting(roles=["user"], messages=[prompt]) From 2ec78e1ed6b05619873eb83bda473f657437abe2 Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Tue, 12 Sep 2023 16:36:17 +0000 Subject: [PATCH 8/8] add docstrings and update readme --- neurons/openai/README.md | 170 +++++++++++++++------------------------ neurons/openai/miner.py | 75 +++++++++++++++++ 2 files changed, 141 insertions(+), 104 deletions(-) diff --git a/neurons/openai/README.md b/neurons/openai/README.md index 9cb0226..388c434 100644 --- a/neurons/openai/README.md +++ b/neurons/openai/README.md @@ -1,5 +1,5 @@ # OpenAI Bittensor Miner -This repository contains a Bittensor Miner that uses OpenAI's GPT-3.5-turbo model as its synapse. The miner connects to the Bittensor network, registers its wallet, and serves the GPT-3.5-turbo model to the network. +This repository contains a Bittensor Miner that uses OpenAI's GPT-3.5-turbo model as its synapse. The miner connects to the Bittensor network, registers its wallet, and serves the GPT-3.5-turbo model to the network by attaching the prompt function to the axon. ## Prerequisites @@ -28,122 +28,84 @@ To run the OpenAI Bittensor Miner with default settings, use the following comma ``` python3 -m pip install -r openminers/text_to_text/miner/openai/requirements.txt export OPENAI_API_KEY='sk-yourkey' -python3 openminers/text_to_text/miner/openai/miner.py +python3 neurons/miners/openai/miner.py ``` # Full Usage ``` -usage: miner.py [-h] [--openai.suffix OPENAI.SUFFIX] [--openai.max_tokens OPENAI.MAX_TOKENS] - [--openai.temperature OPENAI.TEMPERATURE] [--openai.top_p OPENAI.TOP_P] [--openai.n OPENAI.N] - [--openai.presence_penalty OPENAI.PRESENCE_PENALTY] [--openai.frequency_penalty OPENAI.FREQUENCY_PENALTY] - [--openai.model_name OPENAI.MODEL_NAME] [--netuid NETUID] [--miner.name NEURON.NAME] - [--miner.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--miner.no_set_weights] - [--miner.max_batch_size NEURON.MAX_BATCH_SIZE] [--miner.max_sequence_len NEURON.MAX_SEQUENCE_LEN] - [--miner.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...]] [--miner.blacklist.allow_non_registered] - [--miner.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] [--miner.default_priority NEURON.DEFAULT_PRIORITY] - [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] - [--wallet.reregister WALLET.REREGISTER] [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] - [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] - [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] - [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] - [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] - [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] - [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] - [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] - [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] - [--logging.logging_dir LOGGING.LOGGING_DIR] [--metagraph._mock] [--config CONFIG] [--strict] +usage: miner.py [-h] [--axon.port AXON.PORT] [--subtensor.network SUBTENSOR.NETWORK] [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--netuid NETUID] [--miner.root MINER.ROOT] [--miner.name MINER.NAME] + [--miner.blocks_per_epoch MINER.BLOCKS_PER_EPOCH] [--miner.blacklist.blacklist [MINER.BLACKLIST.BLACKLIST ...]] [--miner.blacklist.whitelist [MINER.BLACKLIST.WHITELIST ...]] + [--miner.blacklist.force_validator_permit] [--miner.blacklist.allow_non_registered] [--miner.blacklist.minimum_stake_requirement MINER.BLACKLIST.MINIMUM_STAKE_REQUIREMENT] + [--miner.blacklist.prompt_cache_block_span MINER.BLACKLIST.PROMPT_CACHE_BLOCK_SPAN] [--miner.blacklist.min_request_period MINER.BLACKLIST.MIN_REQUEST_PERIOD] [--miner.priority.default MINER.PRIORITY.DEFAULT] + [--miner.priority.use_s MINER.PRIORITY.USE_S] [--miner.priority.time_stake_multiplicate MINER.PRIORITY.TIME_STAKE_MULTIPLICATE] [--miner.priority.len_request_timestamps MINER.PRIORITY.LEN_REQUEST_TIMESTAMPS] + [--miner.no_set_weights] [--miner.no_serve] [--miner.no_start_axon] [--miner.no_register] [--miner.mock_subtensor] [--wandb.on] [--wandb.project_name WANDB.PROJECT_NAME] [--wandb.entity WANDB.ENTITY] + [--logging.debug] [--logging.trace] [--logging.record_log] [--logging.logging_dir LOGGING.LOGGING_DIR] [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--config CONFIG] + [--strict] [--no_version_checking] [--no_prompt] -optional arguments: - -h, --help show this help message and exit - --openai.suffix OPENAI.SUFFIX - The suffix that comes after a completion of inserted text. - --openai.max_tokens OPENAI.MAX_TOKENS - The maximum number of tokens to generate in the completion. - --openai.temperature OPENAI.TEMPERATURE - Sampling temperature to use, between 0 and 2. - --openai.top_p OPENAI.TOP_P - Nucleus sampling parameter, top_p probability mass. - --openai.n OPENAI.N How many completions to generate for each prompt. - --openai.presence_penalty OPENAI.PRESENCE_PENALTY - Penalty for tokens based on their presence in the text so far. - --openai.frequency_penalty OPENAI.FREQUENCY_PENALTY - Penalty for tokens based on their frequency in the text so far. - --openai.model_name OPENAI.MODEL_NAME - OpenAI model to use for completion. - --netuid NETUID Subnet netuid - --miner.name NEURON.NAME +options: + -h, --help show this help message and exit + --axon.port AXON.PORT + Port to run the axon on. + --subtensor.network SUBTENSOR.NETWORK + Bittensor network to connect to. + --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT + Chain endpoint to connect to. + --netuid NETUID The chain subnet uid. + --miner.root MINER.ROOT Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --miner.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH + --miner.name MINER.NAME + Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name + --miner.blocks_per_epoch MINER.BLOCKS_PER_EPOCH Blocks until the miner sets weights on chain - --miner.no_set_weights - If True, the model does not set weights. - --miner.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --miner.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --miner.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...] - To blacklist certain hotkeys + --miner.blacklist.blacklist [MINER.BLACKLIST.BLACKLIST ...] + Blacklist certain hotkeys + --miner.blacklist.whitelist [MINER.BLACKLIST.WHITELIST ...] + Whitelist certain hotkeys + --miner.blacklist.force_validator_permit + Only allow requests from validators --miner.blacklist.allow_non_registered If True, the miner will allow non-registered hotkeys to mine. - --miner.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --miner.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes - new worker threads to service requests up to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- - mock (creates a mock connection (for testing)) If this option is set it overloads subtensor.chain_endpoint with an - entry point node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. + --miner.blacklist.minimum_stake_requirement MINER.BLACKLIST.MINIMUM_STAKE_REQUIREMENT + Minimum stake requirement + --miner.blacklist.prompt_cache_block_span MINER.BLACKLIST.PROMPT_CACHE_BLOCK_SPAN + Amount of blocks to keep a prompt in cache + --miner.blacklist.min_request_period MINER.BLACKLIST.MIN_REQUEST_PERIOD + Time period (in minute) to serve a maximum of 50 requests for each hotkey + --miner.priority.default MINER.PRIORITY.DEFAULT + Default priority of non-registered requests + --miner.priority.use_s MINER.PRIORITY.USE_S + A multiplier + --miner.priority.time_stake_multiplicate MINER.PRIORITY.TIME_STAKE_MULTIPLICATE + Time (in minute) it takes to make the stake twice more important in the priority queue + --miner.priority.len_request_timestamps MINER.PRIORITY.LEN_REQUEST_TIMESTAMPS + Number of historic request timestamps to record + --miner.no_set_weights + If True, the miner does not set weights. + --miner.no_serve If True, the miner doesnt serve the axon. + --miner.no_start_axon + If True, the miner doesnt start the axon. + --miner.no_register If True, the miner doesnt register its wallet. + --miner.mock_subtensor + If True, the miner will allow non-registered hotkeys to mine. + --wandb.on Turn on wandb. + --wandb.project_name WANDB.PROJECT_NAME + The name of the project where youre sending the new run. + --wandb.entity WANDB.ENTITY + An entity is a username or team name where youre sending runs. --logging.debug Turn on bittensor debugging information --logging.trace Turn on bittensor trace level information --logging.record_log Turns on logging to file. --logging.logging_dir LOGGING.LOGGING_DIR Logging default root directory. - --metagraph._mock To turn on metagraph mocking for testing purposes. + --wallet.name WALLET.NAME + The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) + --wallet.hotkey WALLET.HOTKEY + The name of the wallet's hotkey. + --wallet.path WALLET.PATH + The path to your bittensor wallets --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. + --strict If flagged, config will check that only exact arguments have been set. + --no_version_checking + Set true to stop cli version checking. + --no_prompt Set true to stop cli from prompting the user. ``` \ No newline at end of file diff --git a/neurons/openai/miner.py b/neurons/openai/miner.py index 064741f..4b33cc1 100644 --- a/neurons/openai/miner.py +++ b/neurons/openai/miner.py @@ -28,6 +28,22 @@ class OpenAIMiner(Miner): @classmethod def add_args(cls, parser: argparse.ArgumentParser): + """ + Adds OpenAI-specific arguments to the command line parser. + + This class method introduces command-line arguments that pertain specifically to the + OpenAI GPT model's completion settings, such as temperature, max tokens, and model name. + Developers extending or utilizing this method can easily customize the miner's operation + by providing these arguments when starting the miner. + + Args: + parser (argparse.ArgumentParser): + The command line argument parser to which the OpenAI-specific arguments should be added. + + Note: + Consider adding or adjusting arguments here if introducing new features or parameters + related to OpenAI's model completion. + """ parser.add_argument( "--openai.suffix", type=str, @@ -78,6 +94,23 @@ def add_args(cls, parser: argparse.ArgumentParser): ) def config(self) -> "bittensor.Config": + """ + Provides the configuration for the OpenAIMiner. + + This method returns a configuration object specific to the OpenAIMiner, containing settings + and parameters related to the OpenAI model and its interaction parameters. The configuration + ensures the miner's optimal operation with the OpenAI model and can be customized by adjusting + the command-line arguments introduced in the `add_args` method. + + Returns: + bittensor.Config: + A configuration object specific to the OpenAIMiner, detailing the OpenAI model settings + and operational parameters. + + Note: + If introducing new settings or parameters for OpenAI or the miner's operation, ensure they + are properly initialized and returned in this configuration method. + """ parser = argparse.ArgumentParser(description="OpenAI Miner Configs") self.add_args(parser) return bittensor.config(parser) @@ -93,6 +126,34 @@ def __init__(self, api_key: Optional[str] = None, *args, **kwargs): openai.api_key = api_key def prompt(self, synapse: Prompting) -> Prompting: + """ + Overrides the Miner's abstract `prompt` method to process incoming requests using OpenAI. + + This method makes use of the OpenAI GPT model to generate completions for the incoming requests. + When implementing or extending this method, developers should ensure that the `synapse` object + contains both `roles` and `messages` fields. The `roles` field describes the type of each message + (e.g., system, user), while the `messages` field contains the actual content of each message. + + Args: + synapse (Prompting): + The incoming request object. Must contain: + - `roles`: List of roles for each message, e.g., ["system", "user"]. + Describes the origin or type of each message. + - `messages`: List of actual message content corresponding to each role. + The combination of roles and messages forms a conversation context for the model. + + Returns: + Prompting: + The response object containing the model's generated completion. This is essentially + the filled synapse request object with an added `completion` field which contains the + model's response. + + Note: + Developers extending this method should ensure proper handling of both `roles` and `messages` + from the `synapse` object to maintain the conversation context. Additionally, consider adjusting + OpenAI-specific parameters (e.g., temperature, max_tokens) in the config to tailor the response + generation process. + """ messages = [ {"role": role, "content": message} for role, message in zip(synapse.roles, synapse.messages) @@ -114,6 +175,20 @@ def prompt(self, synapse: Prompting) -> Prompting: if __name__ == "__main__": + """ + Main execution point for the OpenAIMiner. + + This script initializes and runs the OpenAIMiner, which connects to the Bittensor network + and uses the OpenAI model for processing incoming requests. The miner continuously listens + for these requests, generating responses using the OpenAI GPT model's completion capabilities. + + Before running, ensure that the `OPENAI_API_KEY` environment variable is set with a valid + OpenAI API key to authorize the model's completions. + + Note: + When executing the script, the miner runs indefinitely, periodically logging its status. + To stop the miner, use a keyboard interrupt or ensure proper termination of the script. + """ openai_api_key = os.getenv("OPENAI_API_KEY") with OpenAIMiner(api_key=openai_api_key):