Skip to content

Commit

Permalink
Fix formatting and linting (NVIDIA#316)
Browse files Browse the repository at this point in the history
* Fix formatting and linting

Signed-off-by: Martin Hickey <martin.hickey@ie.ibm.com>

* Update after review

Signed-off-by: Martin Hickey <martin.hickey@ie.ibm.com>

* Update after review

Signed-off-by: Martin Hickey <martin.hickey@ie.ibm.com>

---------

Signed-off-by: Martin Hickey <martin.hickey@ie.ibm.com>
  • Loading branch information
hickeyma authored Mar 5, 2024
1 parent d1d992e commit ddac6ba
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 20 deletions.
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ fail-under=10
#from-stdin=

# Files or directories to be skipped. They should be base names, not paths.
ignore=CVS,protobufs
ignore=CVS,protobufs,train,llamacpp

# Add files or directories matching the regular expressions patterns to the
# ignore-list. The regex matches against paths and can be in Posix or Windows
Expand Down
23 changes: 11 additions & 12 deletions cli/generator/generate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,14 @@ def check_prompt_file(prompt_file_path):
def encode_prompt(prompt_instructions, prompt):
"""Encode multiple prompt instructions into a single string."""
idx = 0
prompt = prompt.format(taxonomy=prompt_instructions[0]['taxonomy_path'])
prompt = prompt.format(taxonomy=prompt_instructions[0]["taxonomy_path"])
# pylint: disable=unused-variable
for idx, task_dict in enumerate(prompt_instructions):
(instruction, prompt_input, prompt_output, taxonomy_path) = (
(instruction, prompt_input, prompt_output, taxonomy_path,) = (
task_dict["instruction"],
task_dict["input"],
task_dict["output"],
task_dict['taxonomy_path']
task_dict["taxonomy_path"],
)
instruction = re.sub(r"\s+", " ", instruction).strip().rstrip(":")
prompt_input = "<noinput>" if prompt_input.lower() == "" else prompt_input
Expand Down Expand Up @@ -152,13 +153,6 @@ def find_word_in_string(w, s):
return re.compile(r"\b({0})\b".format(w), flags=re.IGNORECASE).search(s)


def get_seed_examples(contents):
if "seed_examples" in contents:
return contents["seed_examples"]
else:
return contents


def get_seed_examples(contents):
if "seed_examples" in contents:
return contents["seed_examples"]
Expand Down Expand Up @@ -245,9 +239,14 @@ def generate_data(
)
warnings += 1
continue
tax_path = '->'.join(file_path.split(os.sep)[1:-1])
tax_path = "->".join(file_path.split(os.sep)[1:-1])
seed_instruction_data.append(
{"instruction": q, "input": "", "output": a, 'taxonomy_path': tax_path}
{
"instruction": q,
"input": "",
"output": a,
"taxonomy_path": tax_path,
}
)
except Exception as e:
errors += 1
Expand Down
16 changes: 9 additions & 7 deletions cli/lab.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ def generate(

@cli.command()
@click.pass_context
# pylint: disable=function-redefined
def test(ctx):
"""Perform rudimentary tests of the model"""
click.echo("# test TBD")
Expand Down Expand Up @@ -399,10 +400,9 @@ def is_macos_with_m_chip():
try:
# Running 'sysctl -a' and searching for a specific line that indicates ARM architecture
result = subprocess.check_output(["sysctl", "-a"], text=True)
if "machdep.cpu.brand_string: Apple" in result:
return True
else:
return False
is_m_chip = "machdep.cpu.brand_string: Apple" in result
return is_m_chip
# pylint: disable=broad-exception-caught
except Exception as e:
print(f"Error checking architecture: {e}")
return False
Expand Down Expand Up @@ -446,7 +446,7 @@ def train(
"""
if not is_macos_with_m_chip():
click.secho(
f"`lab train` is only implemented for macOS with M-series chips",
"`lab train` is only implemented for macOS with M-series chips",
fg="red",
)
sys.exit()
Expand All @@ -462,6 +462,7 @@ def train(
train_files = glob(taxonomy_path + "/train_*")
test_files = glob(taxonomy_path + "/test_*")
if len(train_files) > 1 or len(test_files) > 1:
# pylint: disable=f-string-without-interpolation
click.secho(
f"Found multiple files from `lab generate`. Using the first one.",
fg="yellow",
Expand Down Expand Up @@ -533,13 +534,14 @@ def train(
default="ibm-merlinite-7b-mlx-q",
)
@click.option("--adapter-file", help="LoRA adapter to use for test.", default=None)
# pylint: disable=function-redefined
def test(data_dir, model_dir, adapter_file):
"""
TODO
"""
if not is_macos_with_m_chip():
click.secho(
f"`lab train` is only implemented for macOS with M-series chips",
"`lab train` is only implemented for macOS with M-series chips",
fg="red",
)
sys.exit()
Expand All @@ -551,7 +553,7 @@ def test(data_dir, model_dir, adapter_file):

# Load the JSON Lines file
test_data_dir = f"{data_dir}/test.jsonl"
with open(test_data_dir, "r") as f:
with open(test_data_dir, "r", encoding="utf-8") as f:
test_data = [json.loads(line) for line in f]

SYS_PROMPT = "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."
Expand Down

0 comments on commit ddac6ba

Please sign in to comment.