diff --git a/.pylintrc b/.pylintrc index cf377c33fbb5..4aabfeed2b6e 100644 --- a/.pylintrc +++ b/.pylintrc @@ -46,7 +46,7 @@ fail-under=10 #from-stdin= # Files or directories to be skipped. They should be base names, not paths. -ignore=CVS,protobufs +ignore=CVS,protobufs,train,llamacpp # Add files or directories matching the regular expressions patterns to the # ignore-list. The regex matches against paths and can be in Posix or Windows diff --git a/cli/generator/generate_data.py b/cli/generator/generate_data.py index 5a28f55a65e0..eda0e54f32cb 100755 --- a/cli/generator/generate_data.py +++ b/cli/generator/generate_data.py @@ -64,13 +64,14 @@ def check_prompt_file(prompt_file_path): def encode_prompt(prompt_instructions, prompt): """Encode multiple prompt instructions into a single string.""" idx = 0 - prompt = prompt.format(taxonomy=prompt_instructions[0]['taxonomy_path']) + prompt = prompt.format(taxonomy=prompt_instructions[0]["taxonomy_path"]) + # pylint: disable=unused-variable for idx, task_dict in enumerate(prompt_instructions): - (instruction, prompt_input, prompt_output, taxonomy_path) = ( + (instruction, prompt_input, prompt_output, taxonomy_path,) = ( task_dict["instruction"], task_dict["input"], task_dict["output"], - task_dict['taxonomy_path'] + task_dict["taxonomy_path"], ) instruction = re.sub(r"\s+", " ", instruction).strip().rstrip(":") prompt_input = "" if prompt_input.lower() == "" else prompt_input @@ -152,13 +153,6 @@ def find_word_in_string(w, s): return re.compile(r"\b({0})\b".format(w), flags=re.IGNORECASE).search(s) -def get_seed_examples(contents): - if "seed_examples" in contents: - return contents["seed_examples"] - else: - return contents - - def get_seed_examples(contents): if "seed_examples" in contents: return contents["seed_examples"] @@ -245,9 +239,14 @@ def generate_data( ) warnings += 1 continue - tax_path = '->'.join(file_path.split(os.sep)[1:-1]) + tax_path = "->".join(file_path.split(os.sep)[1:-1]) seed_instruction_data.append( - {"instruction": q, "input": "", "output": a, 'taxonomy_path': tax_path} + { + "instruction": q, + "input": "", + "output": a, + "taxonomy_path": tax_path, + } ) except Exception as e: errors += 1 diff --git a/cli/lab.py b/cli/lab.py index 90f1ab15e651..6cbe9e6ea29f 100755 --- a/cli/lab.py +++ b/cli/lab.py @@ -295,6 +295,7 @@ def generate( @cli.command() @click.pass_context +# pylint: disable=function-redefined def test(ctx): """Perform rudimentary tests of the model""" click.echo("# test TBD") @@ -399,10 +400,9 @@ def is_macos_with_m_chip(): try: # Running 'sysctl -a' and searching for a specific line that indicates ARM architecture result = subprocess.check_output(["sysctl", "-a"], text=True) - if "machdep.cpu.brand_string: Apple" in result: - return True - else: - return False + is_m_chip = "machdep.cpu.brand_string: Apple" in result + return is_m_chip + # pylint: disable=broad-exception-caught except Exception as e: print(f"Error checking architecture: {e}") return False @@ -446,7 +446,7 @@ def train( """ if not is_macos_with_m_chip(): click.secho( - f"`lab train` is only implemented for macOS with M-series chips", + "`lab train` is only implemented for macOS with M-series chips", fg="red", ) sys.exit() @@ -462,6 +462,7 @@ def train( train_files = glob(taxonomy_path + "/train_*") test_files = glob(taxonomy_path + "/test_*") if len(train_files) > 1 or len(test_files) > 1: + # pylint: disable=f-string-without-interpolation click.secho( f"Found multiple files from `lab generate`. Using the first one.", fg="yellow", @@ -533,13 +534,14 @@ def train( default="ibm-merlinite-7b-mlx-q", ) @click.option("--adapter-file", help="LoRA adapter to use for test.", default=None) +# pylint: disable=function-redefined def test(data_dir, model_dir, adapter_file): """ TODO """ if not is_macos_with_m_chip(): click.secho( - f"`lab train` is only implemented for macOS with M-series chips", + "`lab train` is only implemented for macOS with M-series chips", fg="red", ) sys.exit() @@ -551,7 +553,7 @@ def test(data_dir, model_dir, adapter_file): # Load the JSON Lines file test_data_dir = f"{data_dir}/test.jsonl" - with open(test_data_dir, "r") as f: + with open(test_data_dir, "r", encoding="utf-8") as f: test_data = [json.loads(line) for line in f] SYS_PROMPT = "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."