Skip to content

Commit

Permalink
support converting TurboSparse mistral model which embeds MLP in Pyto…
Browse files Browse the repository at this point in the history
…rch tensors
  • Loading branch information
hodlen committed Jul 9, 2024
1 parent 61cac9b commit 5b02459
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 4 deletions.
9 changes: 5 additions & 4 deletions convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -1205,7 +1205,7 @@ def main(args_in: list[str] | None = None) -> None:
parser.add_argument("--bigendian", action="store_true", help="model is executed on big endian machine")
parser.add_argument("--vocabtype", choices=["spm", "bpe"], help="vocab format (default: spm)", default="spm")
parser.add_argument("model", type=Path, help="directory containing model file, or model file itself (*.pth, *.pt, *.bin, *.safetensors)")
parser.add_argument("sparse_predictor", type=Path, help="predictors for sparse FFN inference")
parser.add_argument("sparse_predictor", type=Path, help="predictors for sparse FFN inference", nargs='?')

args = parser.parse_args(args_in)

Expand All @@ -1230,9 +1230,10 @@ def main(args_in: list[str] | None = None) -> None:
if not args.vocab_only:
model_plus = load_some_model(args.model)
params = Params.load(model_plus)
mlp_predictor_plus = load_predictor_model(args.sparse_predictor)
params.predictor_params = PredictorParams.load(mlp_predictor_plus)
model_plus = merge_multifile_models([model_plus, mlp_predictor_plus])
if args.sparse_predictor:
mlp_predictor_plus = load_predictor_model(args.sparse_predictor)
params.predictor_params = PredictorParams.load(mlp_predictor_plus)
model_plus = merge_multifile_models([model_plus, mlp_predictor_plus])
else:
model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None)
params = Params.load(model_plus)
Expand Down
2 changes: 2 additions & 0 deletions gguf-py/gguf/tensor_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,12 @@ class TensorNameMap:

MODEL_TENSOR.FC_1: (
"model.layers.{bid}.fc1",
"model.layers.{bid}.mlp.predictor.fc1",
),

MODEL_TENSOR.FC_2: (
"model.layers.{bid}.fc2",
"model.layers.{bid}.mlp.predictor.fc2",
),
}

Expand Down

0 comments on commit 5b02459

Please sign in to comment.