Skip to content

Commit

Permalink
bug fix
Browse files Browse the repository at this point in the history
  • Loading branch information
bmosaicml committed Jun 28, 2023
1 parent 7e86f88 commit e69cff7
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 12 deletions.
8 changes: 4 additions & 4 deletions mcli/mcli-hf-eval.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
integrations:
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
# git_branch: # use your branch
# git_branch: # use your branch
# git_commit: # OR use your commit hash
pip_install: -e ".[gpu]"
ssh_clone: false # Should be true if using a private repo
Expand All @@ -13,8 +13,8 @@ command: |
# Mosaic Cloud will use run_name (with a unique suffix) to populate the env var $RUN_NAME
run_name: all-eval
gpu_num: 8
gpu_type: a100_40gb
cluster: r7z2 # replace with your cluster here!
# gpu_type:
# cluster: # replace with your cluster here!

image: mosaicml/llm-foundry:2.0.1_cu118-latest

Expand Down Expand Up @@ -115,7 +115,7 @@ parameters:
model_name: mosaicml/mpt-7b-chat
# Tokenizer
tokenizer:
name: EleutherAI/gpt-neox-20b
name: mosaicml/mpt-7b-chat
kwargs:
model_max_length: ${max_seq_len}

Expand Down
8 changes: 6 additions & 2 deletions scripts/eval/eval.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

import os
import re
import sys
import time
Expand Down Expand Up @@ -36,7 +37,7 @@ def load_model(model_cfg, tokenizer, num_retries):
)


def evaluate_model(model_cfg):
def evaluate_model(model_cfg, run_name):
print(f'Evaluating model: {model_cfg.model_name}', flush=True)
# Build tokenizer and model
tokenizer = build_tokenizer(model_cfg.tokenizer)
Expand Down Expand Up @@ -81,6 +82,7 @@ def evaluate_model(model_cfg):
load_path = model_cfg.get('load_path', None)

trainer = Trainer(
run_name=run_name,
model=composer_model,
loggers=loggers,
precision=cfg.precision,
Expand All @@ -106,6 +108,8 @@ def evaluate_model(model_cfg):

def main(cfg):
cfg.dist_timeout = cfg.get('dist_timeout', 600.0)
if cfg.get('run_name') is None:
cfg.run_name = os.environ.get('RUN_NAME', 'llm')

reproducibility.seed_all(cfg.seed)
dist.initialize_dist(get_device(None), timeout=cfg.dist_timeout)
Expand All @@ -116,7 +120,7 @@ def main(cfg):

try:
(in_memory_logger, logger_keys, model_gauntlet_callback,
model_gauntlet) = evaluate_model(model_cfg)
model_gauntlet) = evaluate_model(model_cfg, cfg.run_name)

composite_scores = model_gauntlet_callback.eval_end(
None, in_memory_logger)
Expand Down
7 changes: 1 addition & 6 deletions scripts/eval/yamls/model_gauntlet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ model_gauntlet:
weighting: EQUAL
subtract_random_baseline: true
rescale_accuracy: true
tasks:
categories:
- name: world_knowledge
benchmarks:
- name: jeopardy
Expand Down Expand Up @@ -112,8 +112,3 @@ model_gauntlet:
- name: boolq
num_fewshot: 10
random_baseline: 0.5
- name: programming
benchmarks:
- name: humaneval
num_fewshot: 0
random_baseline: 0.0

0 comments on commit e69cff7

Please sign in to comment.