Support Llama 3 conversion #6745

pcuenca · 2024-04-18T16:39:12Z

The tokenizer is BPE.

osanseviero · 2024-04-18T16:44:56Z

What a 🐐

Josh-XT · 2024-04-18T17:05:51Z

What a champion lol. PR open within 30 minutes of model release.

m18coppola · 2024-04-18T18:08:39Z

Doesn't seem that the eos_token is working with either of the convert scripts in this PR

USBhost · 2024-04-18T18:14:29Z

I can't convert 70b on this

EDIT: run with "--vocab-type bpe"

mchiang0610 · 2024-04-18T18:40:58Z

This is what we did to get the model out -- it doesn't seem like the special tokens are added properly.

We are looking deeper for further improvements / fixes.

Edit by JG: made collapsible

{
  "added_tokens_decoder": {
    "128000": {
      "content": "<|begin_of_text|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128001": {
      "content": "<|end_of_text|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128002": {
      "content": "<|reserved_special_token_0|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128003": {
      "content": "<|reserved_special_token_1|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128004": {
      "content": "<|reserved_special_token_2|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128005": {
      "content": "<|reserved_special_token_3|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128006": {
      "content": "<|start_header_id|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "128007": {
      "content": "<|end_header_id|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "128008": {
      "content": "<|reserved_special_token_4|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128009": {
      "content": "<|eot_id|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "128010": {
      "content": "<|reserved_special_token_5|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128011": {
      "content": "<|reserved_special_token_6|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128012": {
      "content": "<|reserved_special_token_7|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128013": {
      "content": "<|reserved_special_token_8|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128014": {
      "content": "<|reserved_special_token_9|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128015": {
      "content": "<|reserved_special_token_10|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128016": {
      "content": "<|reserved_special_token_11|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128017": {
      "content": "<|reserved_special_token_12|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128018": {
      "content": "<|reserved_special_token_13|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128019": {
      "content": "<|reserved_special_token_14|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128020": {
      "content": "<|reserved_special_token_15|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128021": {
      "content": "<|reserved_special_token_16|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128022": {
      "content": "<|reserved_special_token_17|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128023": {
      "content": "<|reserved_special_token_18|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128024": {
      "content": "<|reserved_special_token_19|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128025": {
      "content": "<|reserved_special_token_20|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128026": {
      "content": "<|reserved_special_token_21|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128027": {
      "content": "<|reserved_special_token_22|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128028": {
      "content": "<|reserved_special_token_23|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128029": {
      "content": "<|reserved_special_token_24|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128030": {
      "content": "<|reserved_special_token_25|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128031": {
      "content": "<|reserved_special_token_26|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128032": {
      "content": "<|reserved_special_token_27|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128033": {
      "content": "<|reserved_special_token_28|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128034": {
      "content": "<|reserved_special_token_29|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128035": {
      "content": "<|reserved_special_token_30|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128036": {
      "content": "<|reserved_special_token_31|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128037": {
      "content": "<|reserved_special_token_32|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128038": {
      "content": "<|reserved_special_token_33|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128039": {
      "content": "<|reserved_special_token_34|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128040": {
      "content": "<|reserved_special_token_35|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128041": {
      "content": "<|reserved_special_token_36|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128042": {
      "content": "<|reserved_special_token_37|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128043": {
      "content": "<|reserved_special_token_38|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128044": {
      "content": "<|reserved_special_token_39|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128045": {
      "content": "<|reserved_special_token_40|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128046": {
      "content": "<|reserved_special_token_41|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128047": {
      "content": "<|reserved_special_token_42|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128048": {
      "content": "<|reserved_special_token_43|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128049": {
      "content": "<|reserved_special_token_44|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128050": {
      "content": "<|reserved_special_token_45|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128051": {
      "content": "<|reserved_special_token_46|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128052": {
      "content": "<|reserved_special_token_47|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128053": {
      "content": "<|reserved_special_token_48|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128054": {
      "content": "<|reserved_special_token_49|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128055": {
      "content": "<|reserved_special_token_50|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128056": {
      "content": "<|reserved_special_token_51|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128057": {
      "content": "<|reserved_special_token_52|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128058": {
      "content": "<|reserved_special_token_53|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128059": {
      "content": "<|reserved_special_token_54|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128060": {
      "content": "<|reserved_special_token_55|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128061": {
      "content": "<|reserved_special_token_56|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128062": {
      "content": "<|reserved_special_token_57|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128063": {
      "content": "<|reserved_special_token_58|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128064": {
      "content": "<|reserved_special_token_59|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128065": {
      "content": "<|reserved_special_token_60|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128066": {
      "content": "<|reserved_special_token_61|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128067": {
      "content": "<|reserved_special_token_62|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128068": {
      "content": "<|reserved_special_token_63|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128069": {
      "content": "<|reserved_special_token_64|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128070": {
      "content": "<|reserved_special_token_65|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128071": {
      "content": "<|reserved_special_token_66|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128072": {
      "content": "<|reserved_special_token_67|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128073": {
      "content": "<|reserved_special_token_68|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128074": {
      "content": "<|reserved_special_token_69|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128075": {
      "content": "<|reserved_special_token_70|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128076": {
      "content": "<|reserved_special_token_71|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128077": {
      "content": "<|reserved_special_token_72|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128078": {
      "content": "<|reserved_special_token_73|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128079": {
      "content": "<|reserved_special_token_74|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128080": {
      "content": "<|reserved_special_token_75|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128081": {
      "content": "<|reserved_special_token_76|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128082": {
      "content": "<|reserved_special_token_77|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128083": {
      "content": "<|reserved_special_token_78|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128084": {
      "content": "<|reserved_special_token_79|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128085": {
      "content": "<|reserved_special_token_80|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128086": {
      "content": "<|reserved_special_token_81|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128087": {
      "content": "<|reserved_special_token_82|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128088": {
      "content": "<|reserved_special_token_83|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128089": {
      "content": "<|reserved_special_token_84|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128090": {
      "content": "<|reserved_special_token_85|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128091": {
      "content": "<|reserved_special_token_86|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128092": {
      "content": "<|reserved_special_token_87|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128093": {
      "content": "<|reserved_special_token_88|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128094": {
      "content": "<|reserved_special_token_89|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128095": {
      "content": "<|reserved_special_token_90|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128096": {
      "content": "<|reserved_special_token_91|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128097": {
      "content": "<|reserved_special_token_92|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128098": {
      "content": "<|reserved_special_token_93|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128099": {
      "content": "<|reserved_special_token_94|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128100": {
      "content": "<|reserved_special_token_95|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128101": {
      "content": "<|reserved_special_token_96|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128102": {
      "content": "<|reserved_special_token_97|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128103": {
      "content": "<|reserved_special_token_98|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128104": {
      "content": "<|reserved_special_token_99|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128105": {
      "content": "<|reserved_special_token_100|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128106": {
      "content": "<|reserved_special_token_101|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128107": {
      "content": "<|reserved_special_token_102|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128108": {
      "content": "<|reserved_special_token_103|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128109": {
      "content": "<|reserved_special_token_104|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128110": {
      "content": "<|reserved_special_token_105|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128111": {
      "content": "<|reserved_special_token_106|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128112": {
      "content": "<|reserved_special_token_107|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128113": {
      "content": "<|reserved_special_token_108|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128114": {
      "content": "<|reserved_special_token_109|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128115": {
      "content": "<|reserved_special_token_110|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128116": {
      "content": "<|reserved_special_token_111|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128117": {
      "content": "<|reserved_special_token_112|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128118": {
      "content": "<|reserved_special_token_113|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128119": {
      "content": "<|reserved_special_token_114|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128120": {
      "content": "<|reserved_special_token_115|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128121": {
      "content": "<|reserved_special_token_116|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128122": {
      "content": "<|reserved_special_token_117|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128123": {
      "content": "<|reserved_special_token_118|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128124": {
      "content": "<|reserved_special_token_119|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128125": {
      "content": "<|reserved_special_token_120|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128126": {
      "content": "<|reserved_special_token_121|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128127": {
      "content": "<|reserved_special_token_122|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128128": {
      "content": "<|reserved_special_token_123|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128129": {
      "content": "<|reserved_special_token_124|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128130": {
      "content": "<|reserved_special_token_125|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128131": {
      "content": "<|reserved_special_token_126|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128132": {
      "content": "<|reserved_special_token_127|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128133": {
      "content": "<|reserved_special_token_128|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128134": {
      "content": "<|reserved_special_token_129|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128135": {
      "content": "<|reserved_special_token_130|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128136": {
      "content": "<|reserved_special_token_131|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128137": {
      "content": "<|reserved_special_token_132|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128138": {
      "content": "<|reserved_special_token_133|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128139": {
      "content": "<|reserved_special_token_134|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128140": {
      "content": "<|reserved_special_token_135|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128141": {
      "content": "<|reserved_special_token_136|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128142": {
      "content": "<|reserved_special_token_137|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128143": {
      "content": "<|reserved_special_token_138|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128144": {
      "content": "<|reserved_special_token_139|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128145": {
      "content": "<|reserved_special_token_140|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128146": {
      "content": "<|reserved_special_token_141|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128147": {
      "content": "<|reserved_special_token_142|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128148": {
      "content": "<|reserved_special_token_143|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128149": {
      "content": "<|reserved_special_token_144|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128150": {
      "content": "<|reserved_special_token_145|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128151": {
      "content": "<|reserved_special_token_146|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128152": {
      "content": "<|reserved_special_token_147|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128153": {
      "content": "<|reserved_special_token_148|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128154": {
      "content": "<|reserved_special_token_149|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128155": {
      "content": "<|reserved_special_token_150|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128156": {
      "content": "<|reserved_special_token_151|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128157": {
      "content": "<|reserved_special_token_152|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128158": {
      "content": "<|reserved_special_token_153|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128159": {
      "content": "<|reserved_special_token_154|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128160": {
      "content": "<|reserved_special_token_155|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128161": {
      "content": "<|reserved_special_token_156|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128162": {
      "content": "<|reserved_special_token_157|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128163": {
      "content": "<|reserved_special_token_158|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128164": {
      "content": "<|reserved_special_token_159|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128165": {
      "content": "<|reserved_special_token_160|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128166": {
      "content": "<|reserved_special_token_161|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128167": {
      "content": "<|reserved_special_token_162|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128168": {
      "content": "<|reserved_special_token_163|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128169": {
      "content": "<|reserved_special_token_164|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128170": {
      "content": "<|reserved_special_token_165|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128171": {
      "content": "<|reserved_special_token_166|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128172": {
      "content": "<|reserved_special_token_167|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128173": {
      "content": "<|reserved_special_token_168|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128174": {
      "content": "<|reserved_special_token_169|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128175": {
      "content": "<|reserved_special_token_170|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128176": {
      "content": "<|reserved_special_token_171|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128177": {
      "content": "<|reserved_special_token_172|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128178": {
      "content": "<|reserved_special_token_173|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128179": {
      "content": "<|reserved_special_token_174|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128180": {
      "content": "<|reserved_special_token_175|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128181": {
      "content": "<|reserved_special_token_176|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128182": {
      "content": "<|reserved_special_token_177|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128183": {
      "content": "<|reserved_special_token_178|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128184": {
      "content": "<|reserved_special_token_179|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128185": {
      "content": "<|reserved_special_token_180|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128186": {
      "content": "<|reserved_special_token_181|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128187": {
      "content": "<|reserved_special_token_182|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128188": {
      "content": "<|reserved_special_token_183|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128189": {
      "content": "<|reserved_special_token_184|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128190": {
      "content": "<|reserved_special_token_185|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128191": {
      "content": "<|reserved_special_token_186|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128192": {
      "content": "<|reserved_special_token_187|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128193": {
      "content": "<|reserved_special_token_188|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128194": {
      "content": "<|reserved_special_token_189|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128195": {
      "content": "<|reserved_special_token_190|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128196": {
      "content": "<|reserved_special_token_191|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128197": {
      "content": "<|reserved_special_token_192|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128198": {
      "content": "<|reserved_special_token_193|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128199": {
      "content": "<|reserved_special_token_194|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128200": {
      "content": "<|reserved_special_token_195|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128201": {
      "content": "<|reserved_special_token_196|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128202": {
      "content": "<|reserved_special_token_197|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128203": {
      "content": "<|reserved_special_token_198|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128204": {
      "content": "<|reserved_special_token_199|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128205": {
      "content": "<|reserved_special_token_200|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128206": {
      "content": "<|reserved_special_token_201|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128207": {
      "content": "<|reserved_special_token_202|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128208": {
      "content": "<|reserved_special_token_203|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128209": {
      "content": "<|reserved_special_token_204|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128210": {
      "content": "<|reserved_special_token_205|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128211": {
      "content": "<|reserved_special_token_206|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128212": {
      "content": "<|reserved_special_token_207|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128213": {
      "content": "<|reserved_special_token_208|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128214": {
      "content": "<|reserved_special_token_209|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128215": {
      "content": "<|reserved_special_token_210|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128216": {
      "content": "<|reserved_special_token_211|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128217": {
      "content": "<|reserved_special_token_212|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128218": {
      "content": "<|reserved_special_token_213|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128219": {
      "content": "<|reserved_special_token_214|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128220": {
      "content": "<|reserved_special_token_215|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128221": {
      "content": "<|reserved_special_token_216|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128222": {
      "content": "<|reserved_special_token_217|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128223": {
      "content": "<|reserved_special_token_218|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128224": {
      "content": "<|reserved_special_token_219|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128225": {
      "content": "<|reserved_special_token_220|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128226": {
      "content": "<|reserved_special_token_221|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128227": {
      "content": "<|reserved_special_token_222|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128228": {
      "content": "<|reserved_special_token_223|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128229": {
      "content": "<|reserved_special_token_224|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128230": {
      "content": "<|reserved_special_token_225|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128231": {
      "content": "<|reserved_special_token_226|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128232": {
      "content": "<|reserved_special_token_227|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128233": {
      "content": "<|reserved_special_token_228|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128234": {
      "content": "<|reserved_special_token_229|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128235": {
      "content": "<|reserved_special_token_230|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128236": {
      "content": "<|reserved_special_token_231|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128237": {
      "content": "<|reserved_special_token_232|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128238": {
      "content": "<|reserved_special_token_233|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128239": {
      "content": "<|reserved_special_token_234|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128240": {
      "content": "<|reserved_special_token_235|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128241": {
      "content": "<|reserved_special_token_236|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128242": {
      "content": "<|reserved_special_token_237|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128243": {
      "content": "<|reserved_special_token_238|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128244": {
      "content": "<|reserved_special_token_239|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128245": {
      "content": "<|reserved_special_token_240|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128246": {
      "content": "<|reserved_special_token_241|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128247": {
      "content": "<|reserved_special_token_242|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128248": {
      "content": "<|reserved_special_token_243|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128249": {
      "content": "<|reserved_special_token_244|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128250": {
      "content": "<|reserved_special_token_245|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128251": {
      "content": "<|reserved_special_token_246|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128252": {
      "content": "<|reserved_special_token_247|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128253": {
      "content": "<|reserved_special_token_248|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128254": {
      "content": "<|reserved_special_token_249|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128255": {
      "content": "<|reserved_special_token_250|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    }
  },
  "bos_token": "<|begin_of_text|>",
  "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
  "clean_up_tokenization_spaces": true,
  "eos_token": "<|end_of_text|>",
  "model_input_names": [
    "input_ids",
    "attention_mask"
  ],
  "model_max_length": 1000000000000000019884624838656,
  "tokenizer_class": "PreTrainedTokenizerFast"
}

pcuenca · 2024-04-18T18:48:33Z

I can't convert 70b on this

@USBhost did you try with convert-hf-to-gguf.py?

jxy · 2024-04-18T18:55:17Z

The instruct models need the tokenizer.ggml.eos_token_id to be 128009, or <|eot_id|>.

USBhost · 2024-04-18T18:55:30Z

I can't convert 70b on this

@USBhost did you try with convert-hf-to-gguf.py?

python convert-hf-to-gguf.py /mnt/36TB/AI/Meta-Llama-3-70B/ --outtype f16
Loading model: Meta-Llama-3-70B
gguf: This GGUF file is for Little Endian only
Set model parameters
gguf: context length = 8192
gguf: embedding length = 8192
gguf: feed forward length = 28672
gguf: head count = 64
gguf: key-value head count = 8
gguf: rope theta = 500000.0
gguf: rms norm epsilon = 1e-05
gguf: file type = 1
Set model tokenizer
Traceback (most recent call last):
  File "/home/usbhost/llama.cpp/convert-hf-to-gguf.py", line 1302, in set_vocab
    self. _set_vocab_sentencepiece()
  File "/home/usbhost/llama.cpp/convert-hf-to-gguf.py", line 330, in _set_vocab_sentencepiece
    raise FileNotFoundError(f"File not found: {tokenizer_path}")
FileNotFoundError: File not found: /mnt/36TB/AI/Meta-Llama-3-70B/tokenizer.model

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/usbhost/llama.cpp/convert-hf-to-gguf.py", line 2736, in <module>
    main()
  File "/home/usbhost/llama.cpp/convert-hf-to-gguf.py", line 2723, in main
    model_instance.set_vocab()
  File "/home/usbhost/llama.cpp/convert-hf-to-gguf.py", line 1305, in set_vocab
    self._set_vocab_llama_hf()
  File "/home/usbhost/llama.cpp/convert-hf-to-gguf.py", line 377, in _set_vocab_llama_hf
    vocab = LlamaHfVocab(self.dir_model)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/usbhost/llama.cpp/convert.py", line 539, in __init__
    raise FileNotFoundError('Cannot find Llama BPE tokenizer')
FileNotFoundError: Cannot find Llama BPE tokenizer

When I add https://huggingface.co/meta-llama/Meta-Llama-3-70B/blob/main/original/tokenizer.model I get the same error as on convert.py

pcuenca · 2024-04-18T18:56:22Z

Doesn't seem that the eos_token is working with either of the convert scripts in this PR

@m18coppola the instruct models use two different EOS tokens: the standard one (<|end_of_text|>), and a second one that signals the end of the assistant turn (<|eot_id|>). Generation must stop when either one is encountered.

I'm not sure how to replicate this behaviour yet. The best solution would be to use a list of eos/stop tokens, but I don't know how to do it, any suggestions on where to look?

Another idea would be to use <|eot_id|> (the assistant finalization token) as the only EOS when converting an instruct model, and <|end_of_text|> when converting a pre-trained model.

mchiang0610 · 2024-04-18T18:58:19Z

@pcuenca for the changes:

"special": false on <|start_header_id|> <|end_header_id|> <|eot_id|>

pcuenca · 2024-04-18T18:59:03Z

The instruct models need the tokenizer.ggml.eos_token_id to be 128009, or <|eot_id|>.

@jxy Our comments were sent at the same time :) Yes, that's one of the solutions I mentioned, but I'm not sure it will work consistently, I've seen models that use various terminators depending on context.

We can try it out though, I'll take a look.

USBhost · 2024-04-18T19:06:08Z

Sorry lads I had to run with --vocab-type bpe
So automatic detection is broken.

ddh0 · 2024-04-18T19:16:41Z

The instruct models need the tokenizer.ggml.eos_token_id to be 128009, or <|eot_id|>.

@jxy Our comments were sent at the same time :) Yes, that's one of the solutions I mentioned, but I'm not sure it will work consistently, I've seen models that use various terminators depending on context.

We can try it out though, I'll take a look.

From the model card on HF:

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

Not sure if this is helpful or not 😅 but thought I might as well mention it.

jxy · 2024-04-18T19:18:56Z

It seems the model generates <|eot_id|> with the official chat template. Otherwise it may generate <|end_of_text|>.

teleprint-me · 2024-04-18T19:50:22Z

It's always the tokenizer. The tokenizers are always a mess.

Special tokens apply to the instruct tuned model.

The ChatFormat class in the source code shows how they implemented it.

The encode_header is interesting. That's a new one? Then they have encode_message and encode_dialog_prompt.

They're using tiktoken for the Tokenizer.

Lots of new special tokens.

        special_tokens = [
            "<|begin_of_text|>",
            "<|end_of_text|>",
            "<|reserved_special_token_0|>",
            "<|reserved_special_token_1|>",
            "<|reserved_special_token_2|>",
            "<|reserved_special_token_3|>",
            "<|start_header_id|>",
            "<|end_header_id|>",
            "<|reserved_special_token_4|>",
            "<|eot_id|>",  # end of turn
        ] + [
            f"<|reserved_special_token_{i}|>"
            for i in range(5, self.num_reserved_special_tokens - 5)
        ]

This should be interesting (and not in a fun way either). This is gonna create another level of complexity.

bullno1 · 2024-04-18T19:56:36Z

Doesn't seem that the eos_token is working with either of the convert scripts in this PR

@m18coppola the instruct models use two different EOS tokens: the standard one (<|end_of_text|>), and a second one that signals the end of the assistant turn (<|eot_id|>). Generation must stop when either one is encountered.

I'm not sure how to replicate this behaviour yet. The best solution would be to use a list of eos/stop tokens, but I don't know how to do it, any suggestions on where to look?

Another idea would be to use <|eot_id|> (the assistant finalization token) as the only EOS when converting an instruct model, and <|end_of_text|> when converting a pre-trained model.

Instead of remapping which creates more confusion, just update the generation code to stop on eot_id.
It's like one line of config/code change.

At least from my cursory tests, all special texts are tokenized properly out of the box.

I did a bit of testing and chat works.

teleprint-me · 2024-04-18T20:00:12Z

Okay, it's in there.

        # BOS / EOS token IDs
        self.bos_id: int = self.special_tokens["<|begin_of_text|>"]
        self.eos_id: int = self.special_tokens["<|end_of_text|>"]
        self.pad_id: int = -1
        self.stop_tokens = {
            self.special_tokens["<|end_of_text|>"],
            self.special_tokens["<|eot_id|>"],
        }

@pcuenca The list of stop tokens are usually added during inference. The chat templates have been embedded lately into llama.cpp. Haven't gotten that far yet, though.

I think I get it now.

Completions:

<|end_of_text|>

Instructions:

<|eot_id|>

That's how I'm interpreting it at the moment. Feel free to correct me.

bullno1 · 2024-04-18T20:08:48Z

@teleprint-me Yep, you just have to stop on eot_id instead which is: 128009.

You can use the tokenization tool to test: https://github.com/ggerganov/llama.cpp/blob/master/examples/tokenize/tokenize.cpp

<|begin_of_text|>, <|start_header_id|> , <|end_header_id|>, <|eot_id|> are all mapped correctly.

dranger003 · 2024-04-18T22:27:23Z

This appears to work for chatting with the model (instruct):

./build/bin/main -ngl 33 -c 0 --interactive-first --color -e --in-prefix '<|start_header_id|>user<|end_header_id|>\n\n' --in-suffix '<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n' -r '<|eot_id|>' -m ggml-meta-llama-3-8b-instruct-f16.gguf

teleprint-me · 2024-04-18T22:39:01Z

In the original convert.py, before the refactoring, BpeVocab would scan the first line if could and assume it was a plaintext format.

Now it's assuming the huggingface BPE format instead of BPE in a general implementation as it was originally. These changes continue to break the convert.py repeatedly even though any huggingface "features" should be isolated to convert-hf-to-gguf.py or should be isolated to the HfVocab class... See the referenced PR's continually pushing these changes over time below.

The current implementation for convert.py with BpeVocab now solely relies upon the huggingface format, which is blocking the conversion process for the torch Llama 3 model.

17:46:36 | /mnt/valerie/remote/ggerganov/llama.cpp
(.venv) git:(llama3-conversion | θ) λ python convert.py --vocab-type bpe /mnt/valerie/models/meta-llama/Meta-Llama-3-8B-Instruct
Loading model file /mnt/valerie/models/meta-llama/Meta-Llama-3-8B-Instruct/consolidated.00.pth
params = Params(n_vocab=128256, n_embd=4096, n_layer=32, n_ctx=4096, n_ff=14336, n_head=32, n_head_kv=8, n_experts=None, n_experts_used=None, f_norm_eps=1e-05, rope_scaling_type=None, f_rope_freq_base=500000.0, f_rope_scale=None, n_orig_ctx=None, rope_finetuned=None, ftype=None, path_model=PosixPath('/mnt/valerie/models/meta-llama/Meta-Llama-3-8B-Instruct'))
Traceback (most recent call last):
  File "/mnt/valerie/remote/ggerganov/llama.cpp/convert.py", line 1555, in <module>
    main()
  File "/mnt/valerie/remote/ggerganov/llama.cpp/convert.py", line 1522, in main
    vocab, special_vocab = vocab_factory.load_vocab(vocab_types, model_parent_path)
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/mnt/valerie/remote/ggerganov/llama.cpp/convert.py", line 1424, in load_vocab
    vocab = self._create_vocab_by_path(vocab_types)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/mnt/valerie/remote/ggerganov/llama.cpp/convert.py", line 1414, in _create_vocab_by_path
    raise FileNotFoundError(f"Could not find a tokenizer matching any of {vocab_types}")
FileNotFoundError: Could not find a tokenizer matching any of ['bpe']

The issue propogates from the BpeVocab constructor which assumes a vocab.json and added_tokens.json are present which is based on the assumptions that it is going to be a huggingface model, which the facebook/meta models are not; They're PyTorch models built without the transformers framework.

These issues are not related to this PR but are now affecting it.

BPE tokenizer implementations keep me up at night.

Rant aside, the BpeVocab is assuming the huggingface "fast" or "slow" tokenizer, but this is neither.

It should be noted that Llama 1 and Llama 2 used sentencepiece models. Llama-3 is using a tiktoken implementation and released it with a plaintext BPE "model" file.

17:51:49 | /mnt/valerie/models/meta-llama
  λ file Meta-Llama-3-8B-Instruct/tokenizer.model 
Meta-Llama-3-8B-Instruct/tokenizer.model: ASCII text
17:52:05 | /mnt/valerie/models/meta-llama
  λ file /mnt/scsm/models/facebook/llama-2/llama-2-7b/tokenizer.model 
/mnt/scsm/models/facebook/llama-2/llama-2-7b/tokenizer.model: data

kalomaze · 2024-04-19T04:43:22Z

cc @dranger003 - I really appreciated your ppl chart visual + measured ppl gap table for different quantization types for CommandR+. Do you think you would be willing to recreate those comparisons on L3 70b (base or Instruct, preferably base?) Thanks

bullno1 · 2024-04-19T08:52:53Z

@teleprint-me Are you saying that it's a happy coincidence that the current llama.cpp implementation happens to tokenize correctly or there exists character sequences out there that will be tokenized incorrectly?

ggerganov · 2024-04-19T09:41:11Z

Does anyone have convert instructions that work - I'm trying both Meta and HF models using this PR and none of the convert scripts work:

$ python3.11 convert.py ~/Data/llama3/Meta-Llama-3-8B/ --outfile ./models/llama-8b-v3/ggml-model-f16.gguf --outtype f16 --vocab-type bpe

FileNotFoundError: Could not find a tokenizer matching any of ['bpe']

$ python3.11 convert-hf-to-gguf.py ~/Data/huggingface/Meta-Llama-3-8B/ --outfile ./models/llama-8b-v3/ggml-model-f16.gguf --outtype f16

FileNotFoundError: File not found: /Users/ggerganov/Data/huggingface/Meta-Llama-3-8B/tokenizer.model

I see a few other people reporting the same problems. Those who succeeded - what were the necessary changes?

ddh0 · 2024-04-19T10:03:48Z

@ggerganov Try --vocab-type bpe with convert-hf-to-gguf.py, that worked for me

ggerganov · 2024-04-19T10:28:01Z

@ddh0 The convert-hf-to-gguf.py script does not support the --vocab-type argument:

$ python3.11 convert-hf-to-gguf.py ~/Data/huggingface/Meta-Llama-3-8B/ --outfile ./models/llama-8b-v3/ggml-model-f16.gguf --outtype f16 --vocab-type bpe

usage: convert-hf-to-gguf.py [-h] [--vocab-only] [--awq-path AWQ_PATH] [--outfile OUTFILE] [--outtype {f32,f16}] [--bigendian] [--use-temp-file] model
convert-hf-to-gguf.py: error: unrecognized arguments: --vocab-type bpe

dranger003 · 2024-04-19T12:13:35Z

@ggerganov These are working on my end.

convert.py works using master and this PR:

python convert.py /models/hub/Meta-Llama-3-8B/ --outfile /models/meta-llama/ggml-meta-llama-3-8b-f16.gguf --vocab-type bpe --outtype f16

convert-hf-to-gguf.py only works using this PR:

python convert-hf-to-gguf.py /models/hub/Meta-Llama-3-8B/ --outfile /models/meta-llama/ggml-meta-llama-3-8b-f16.gguf

XiongjieDai · 2024-05-02T01:08:39Z

Hi @abasu0713, I followed your walkthrough and cloned the latest hf model and llama.cpp. I was successful in converting the model to a gguf one and quantizing it. But when I try to run it, I always get the "unable to load model" error. Any thought?

# convert the model to ggml FP16 format
!python3 convert.py models/Meta-Llama-3-8B-Instruct/ --outfile models/8B-v3-instruct/ggml-model-f16.gguf --vocab-type bpe --outtype f16
# quantize the model to 4-bits (using Q4_K_M method)
!./quantize models/8B-v3-instruct/ggml-model-f16.gguf models/8B-v3-instruct/ggml-model-Q4_K_M.gguf Q4_K_M

Loading model file models/Meta-Llama-3-8B-Instruct/model-00001-of-00004.safetensors
Loading model file models/Meta-Llama-3-8B-Instruct/model-00001-of-00004.safetensors
Loading model file models/Meta-Llama-3-8B-Instruct/model-00002-of-00004.safetensors
Loading model file models/Meta-Llama-3-8B-Instruct/model-00003-of-00004.safetensors
Loading model file models/Meta-Llama-3-8B-Instruct/model-00004-of-00004.safetensors
params = Params(n_vocab=128256, n_embd=4096, n_layer=32, n_ctx=8192, n_ff=14336, n_head=32, n_head_kv=8, n_experts=None, n_experts_used=None, f_norm_eps=1e-05, rope_scaling_type=None, f_rope_freq_base=500000.0, f_rope_scale=None, n_orig_ctx=None, rope_finetuned=None, ftype=<GGMLFileType.MostlyF16: 1>, path_model=PosixPath('models/Meta-Llama-3-8B-Instruct'))
Loaded vocab file PosixPath('models/Meta-Llama-3-8B-Instruct/tokenizer.json'), type 'bpe'
Vocab info: <BpeVocab with 128000 base tokens and 256 added tokens>
Special vocab info: <SpecialVocab with 280147 merges, special tokens {'bos': 128000, 'eos': 128001}, add special tokens unset>
Permuting layer 0
Permuting layer 1
Permuting layer 2
Permuting layer 3
Permuting layer 4
Permuting layer 5
Permuting layer 6
Permuting layer 7
Permuting layer 8
Permuting layer 9
Permuting layer 10
Permuting layer 11
Permuting layer 12
Permuting layer 13
Permuting layer 14
Permuting layer 15
Permuting layer 16
Permuting layer 17
Permuting layer 18
Permuting layer 19
Permuting layer 20
Permuting layer 21
Permuting layer 22
Permuting layer 23
Permuting layer 24
Permuting layer 25
Permuting layer 26
Permuting layer 27
Permuting layer 28
Permuting layer 29
Permuting layer 30
Permuting layer 31
model.embed_tokens.weight                        -> token_embd.weight                        | BF16   | [128256, 4096]
model.layers.0.input_layernorm.weight            -> blk.0.attn_norm.weight                   | BF16   | [4096]
model.layers.0.mlp.down_proj.weight              -> blk.0.ffn_down.weight                    | BF16   | [4096, 14336]
model.layers.0.mlp.gate_proj.weight              -> blk.0.ffn_gate.weight                    | BF16   | [14336, 4096]
model.layers.0.mlp.up_proj.weight                -> blk.0.ffn_up.weight                      | BF16   | [14336, 4096]
model.layers.0.post_attention_layernorm.weight   -> blk.0.ffn_norm.weight                    | BF16   | [4096]
model.layers.0.self_attn.k_proj.weight           -> blk.0.attn_k.weight                      | BF16   | [1024, 4096]
model.layers.0.self_attn.o_proj.weight           -> blk.0.attn_output.weight                 | BF16   | [4096, 4096]
model.layers.0.self_attn.q_proj.weight           -> blk.0.attn_q.weight                      | BF16   | [4096, 4096]
model.layers.0.self_attn.v_proj.weight           -> blk.0.attn_v.weight                      | BF16   | [1024, 4096]
model.layers.1.input_layernorm.weight            -> blk.1.attn_norm.weight                   | BF16   | [4096]
model.layers.1.mlp.down_proj.weight              -> blk.1.ffn_down.weight                    | BF16   | [4096, 14336]
model.layers.1.mlp.gate_proj.weight              -> blk.1.ffn_gate.weight                    | BF16   | [14336, 4096]
model.layers.1.mlp.up_proj.weight                -> blk.1.ffn_up.weight                      | BF16   | [14336, 4096]
model.layers.1.post_attention_layernorm.weight   -> blk.1.ffn_norm.weight                    | BF16   | [4096]
model.layers.1.self_attn.k_proj.weight           -> blk.1.attn_k.weight                      | BF16   | [1024, 4096]
model.layers.1.self_attn.o_proj.weight           -> blk.1.attn_output.weight                 | BF16   | [4096, 4096]
model.layers.1.self_attn.q_proj.weight           -> blk.1.attn_q.weight                      | BF16   | [4096, 4096]
model.layers.1.self_attn.v_proj.weight           -> blk.1.attn_v.weight                      | BF16   | [1024, 4096]
model.layers.2.input_layernorm.weight            -> blk.2.attn_norm.weight                   | BF16   | [4096]
model.layers.2.mlp.down_proj.weight              -> blk.2.ffn_down.weight                    | BF16   | [4096, 14336]
model.layers.2.mlp.gate_proj.weight              -> blk.2.ffn_gate.weight                    | BF16   | [14336, 4096]
model.layers.2.mlp.up_proj.weight                -> blk.2.ffn_up.weight                      | BF16   | [14336, 4096]
model.layers.2.post_attention_layernorm.weight   -> blk.2.ffn_norm.weight                    | BF16   | [4096]
model.layers.2.self_attn.k_proj.weight           -> blk.2.attn_k.weight                      | BF16   | [1024, 4096]
model.layers.2.self_attn.o_proj.weight           -> blk.2.attn_output.weight                 | BF16   | [4096, 4096]
model.layers.2.self_attn.q_proj.weight           -> blk.2.attn_q.weight                      | BF16   | [4096, 4096]
model.layers.2.self_attn.v_proj.weight           -> blk.2.attn_v.weight                      | BF16   | [1024, 4096]
model.layers.3.input_layernorm.weight            -> blk.3.attn_norm.weight                   | BF16   | [4096]
model.layers.3.mlp.down_proj.weight              -> blk.3.ffn_down.weight                    | BF16   | [4096, 14336]
model.layers.3.mlp.gate_proj.weight              -> blk.3.ffn_gate.weight                    | BF16   | [14336, 4096]
model.layers.3.mlp.up_proj.weight                -> blk.3.ffn_up.weight                      | BF16   | [14336, 4096]
model.layers.3.post_attention_layernorm.weight   -> blk.3.ffn_norm.weight                    | BF16   | [4096]
model.layers.3.self_attn.k_proj.weight           -> blk.3.attn_k.weight                      | BF16   | [1024, 4096]
model.layers.3.self_attn.o_proj.weight           -> blk.3.attn_output.weight                 | BF16   | [4096, 4096]
model.layers.3.self_attn.q_proj.weight           -> blk.3.attn_q.weight                      | BF16   | [4096, 4096]
model.layers.3.self_attn.v_proj.weight           -> blk.3.attn_v.weight                      | BF16   | [1024, 4096]
model.layers.4.input_layernorm.weight            -> blk.4.attn_norm.weight                   | BF16   | [4096]
model.layers.4.mlp.down_proj.weight              -> blk.4.ffn_down.weight                    | BF16   | [4096, 14336]
model.layers.4.mlp.gate_proj.weight              -> blk.4.ffn_gate.weight                    | BF16   | [14336, 4096]
model.layers.4.mlp.up_proj.weight                -> blk.4.ffn_up.weight                      | BF16   | [14336, 4096]
model.layers.4.post_attention_layernorm.weight   -> blk.4.ffn_norm.weight                    | BF16   | [4096]
model.layers.4.self_attn.k_proj.weight           -> blk.4.attn_k.weight                      | BF16   | [1024, 4096]
model.layers.4.self_attn.o_proj.weight           -> blk.4.attn_output.weight                 | BF16   | [4096, 4096]
model.layers.4.self_attn.q_proj.weight           -> blk.4.attn_q.weight                      | BF16   | [4096, 4096]
model.layers.4.self_attn.v_proj.weight           -> blk.4.attn_v.weight                      | BF16   | [1024, 4096]
model.layers.5.input_layernorm.weight            -> blk.5.attn_norm.weight                   | BF16   | [4096]
model.layers.5.mlp.down_proj.weight              -> blk.5.ffn_down.weight                    | BF16   | [4096, 14336]
model.layers.5.mlp.gate_proj.weight              -> blk.5.ffn_gate.weight                    | BF16   | [14336, 4096]
model.layers.5.mlp.up_proj.weight                -> blk.5.ffn_up.weight                      | BF16   | [14336, 4096]
model.layers.5.post_attention_layernorm.weight   -> blk.5.ffn_norm.weight                    | BF16   | [4096]
model.layers.5.self_attn.k_proj.weight           -> blk.5.attn_k.weight                      | BF16   | [1024, 4096]
model.layers.5.self_attn.o_proj.weight           -> blk.5.attn_output.weight                 | BF16   | [4096, 4096]
model.layers.5.self_attn.q_proj.weight           -> blk.5.attn_q.weight                      | BF16   | [4096, 4096]
model.layers.5.self_attn.v_proj.weight           -> blk.5.attn_v.weight                      | BF16   | [1024, 4096]
model.layers.6.input_layernorm.weight            -> blk.6.attn_norm.weight                   | BF16   | [4096]
model.layers.6.mlp.down_proj.weight              -> blk.6.ffn_down.weight                    | BF16   | [4096, 14336]
model.layers.6.mlp.gate_proj.weight              -> blk.6.ffn_gate.weight                    | BF16   | [14336, 4096]
model.layers.6.mlp.up_proj.weight                -> blk.6.ffn_up.weight                      | BF16   | [14336, 4096]
model.layers.6.post_attention_layernorm.weight   -> blk.6.ffn_norm.weight                    | BF16   | [4096]
model.layers.6.self_attn.k_proj.weight           -> blk.6.attn_k.weight                      | BF16   | [1024, 4096]
model.layers.6.self_attn.o_proj.weight           -> blk.6.attn_output.weight                 | BF16   | [4096, 4096]
model.layers.6.self_attn.q_proj.weight           -> blk.6.attn_q.weight                      | BF16   | [4096, 4096]
model.layers.6.self_attn.v_proj.weight           -> blk.6.attn_v.weight                      | BF16   | [1024, 4096]
model.layers.7.input_layernorm.weight            -> blk.7.attn_norm.weight                   | BF16   | [4096]
model.layers.7.mlp.down_proj.weight              -> blk.7.ffn_down.weight                    | BF16   | [4096, 14336]
model.layers.7.mlp.gate_proj.weight              -> blk.7.ffn_gate.weight                    | BF16   | [14336, 4096]
model.layers.7.mlp.up_proj.weight                -> blk.7.ffn_up.weight                      | BF16   | [14336, 4096]
model.layers.7.post_attention_layernorm.weight   -> blk.7.ffn_norm.weight                    | BF16   | [4096]
model.layers.7.self_attn.k_proj.weight           -> blk.7.attn_k.weight                      | BF16   | [1024, 4096]
model.layers.7.self_attn.o_proj.weight           -> blk.7.attn_output.weight                 | BF16   | [4096, 4096]
model.layers.7.self_attn.q_proj.weight           -> blk.7.attn_q.weight                      | BF16   | [4096, 4096]
model.layers.7.self_attn.v_proj.weight           -> blk.7.attn_v.weight                      | BF16   | [1024, 4096]
model.layers.8.input_layernorm.weight            -> blk.8.attn_norm.weight                   | BF16   | [4096]
model.layers.8.mlp.down_proj.weight              -> blk.8.ffn_down.weight                    | BF16   | [4096, 14336]
model.layers.8.mlp.gate_proj.weight              -> blk.8.ffn_gate.weight                    | BF16   | [14336, 4096]
model.layers.8.mlp.up_proj.weight                -> blk.8.ffn_up.weight                      | BF16   | [14336, 4096]
model.layers.8.post_attention_layernorm.weight   -> blk.8.ffn_norm.weight                    | BF16   | [4096]
model.layers.8.self_attn.k_proj.weight           -> blk.8.attn_k.weight                      | BF16   | [1024, 4096]
model.layers.8.self_attn.o_proj.weight           -> blk.8.attn_output.weight                 | BF16   | [4096, 4096]
model.layers.8.self_attn.q_proj.weight           -> blk.8.attn_q.weight                      | BF16   | [4096, 4096]
model.layers.8.self_attn.v_proj.weight           -> blk.8.attn_v.weight                      | BF16   | [1024, 4096]
model.layers.10.input_layernorm.weight           -> blk.10.attn_norm.weight                  | BF16   | [4096]
model.layers.10.mlp.down_proj.weight             -> blk.10.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.10.mlp.gate_proj.weight             -> blk.10.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.10.mlp.up_proj.weight               -> blk.10.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.10.post_attention_layernorm.weight  -> blk.10.ffn_norm.weight                   | BF16   | [4096]
model.layers.10.self_attn.k_proj.weight          -> blk.10.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.10.self_attn.o_proj.weight          -> blk.10.attn_output.weight                | BF16   | [4096, 4096]
model.layers.10.self_attn.q_proj.weight          -> blk.10.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.10.self_attn.v_proj.weight          -> blk.10.attn_v.weight                     | BF16   | [1024, 4096]
model.layers.11.input_layernorm.weight           -> blk.11.attn_norm.weight                  | BF16   | [4096]
model.layers.11.mlp.down_proj.weight             -> blk.11.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.11.mlp.gate_proj.weight             -> blk.11.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.11.mlp.up_proj.weight               -> blk.11.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.11.post_attention_layernorm.weight  -> blk.11.ffn_norm.weight                   | BF16   | [4096]
model.layers.11.self_attn.k_proj.weight          -> blk.11.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.11.self_attn.o_proj.weight          -> blk.11.attn_output.weight                | BF16   | [4096, 4096]
model.layers.11.self_attn.q_proj.weight          -> blk.11.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.11.self_attn.v_proj.weight          -> blk.11.attn_v.weight                     | BF16   | [1024, 4096]
model.layers.12.input_layernorm.weight           -> blk.12.attn_norm.weight                  | BF16   | [4096]
model.layers.12.mlp.down_proj.weight             -> blk.12.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.12.mlp.gate_proj.weight             -> blk.12.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.12.mlp.up_proj.weight               -> blk.12.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.12.post_attention_layernorm.weight  -> blk.12.ffn_norm.weight                   | BF16   | [4096]
model.layers.12.self_attn.k_proj.weight          -> blk.12.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.12.self_attn.o_proj.weight          -> blk.12.attn_output.weight                | BF16   | [4096, 4096]
model.layers.12.self_attn.q_proj.weight          -> blk.12.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.12.self_attn.v_proj.weight          -> blk.12.attn_v.weight                     | BF16   | [1024, 4096]
model.layers.13.input_layernorm.weight           -> blk.13.attn_norm.weight                  | BF16   | [4096]
model.layers.13.mlp.down_proj.weight             -> blk.13.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.13.mlp.gate_proj.weight             -> blk.13.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.13.mlp.up_proj.weight               -> blk.13.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.13.post_attention_layernorm.weight  -> blk.13.ffn_norm.weight                   | BF16   | [4096]
model.layers.13.self_attn.k_proj.weight          -> blk.13.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.13.self_attn.o_proj.weight          -> blk.13.attn_output.weight                | BF16   | [4096, 4096]
model.layers.13.self_attn.q_proj.weight          -> blk.13.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.13.self_attn.v_proj.weight          -> blk.13.attn_v.weight                     | BF16   | [1024, 4096]
model.layers.14.input_layernorm.weight           -> blk.14.attn_norm.weight                  | BF16   | [4096]
model.layers.14.mlp.down_proj.weight             -> blk.14.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.14.mlp.gate_proj.weight             -> blk.14.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.14.mlp.up_proj.weight               -> blk.14.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.14.post_attention_layernorm.weight  -> blk.14.ffn_norm.weight                   | BF16   | [4096]
model.layers.14.self_attn.k_proj.weight          -> blk.14.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.14.self_attn.o_proj.weight          -> blk.14.attn_output.weight                | BF16   | [4096, 4096]
model.layers.14.self_attn.q_proj.weight          -> blk.14.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.14.self_attn.v_proj.weight          -> blk.14.attn_v.weight                     | BF16   | [1024, 4096]
model.layers.15.input_layernorm.weight           -> blk.15.attn_norm.weight                  | BF16   | [4096]
model.layers.15.mlp.down_proj.weight             -> blk.15.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.15.mlp.gate_proj.weight             -> blk.15.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.15.mlp.up_proj.weight               -> blk.15.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.15.post_attention_layernorm.weight  -> blk.15.ffn_norm.weight                   | BF16   | [4096]
model.layers.15.self_attn.k_proj.weight          -> blk.15.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.15.self_attn.o_proj.weight          -> blk.15.attn_output.weight                | BF16   | [4096, 4096]
model.layers.15.self_attn.q_proj.weight          -> blk.15.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.15.self_attn.v_proj.weight          -> blk.15.attn_v.weight                     | BF16   | [1024, 4096]
model.layers.16.input_layernorm.weight           -> blk.16.attn_norm.weight                  | BF16   | [4096]
model.layers.16.mlp.down_proj.weight             -> blk.16.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.16.mlp.gate_proj.weight             -> blk.16.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.16.mlp.up_proj.weight               -> blk.16.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.16.post_attention_layernorm.weight  -> blk.16.ffn_norm.weight                   | BF16   | [4096]
model.layers.16.self_attn.k_proj.weight          -> blk.16.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.16.self_attn.o_proj.weight          -> blk.16.attn_output.weight                | BF16   | [4096, 4096]
model.layers.16.self_attn.q_proj.weight          -> blk.16.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.16.self_attn.v_proj.weight          -> blk.16.attn_v.weight                     | BF16   | [1024, 4096]
model.layers.17.input_layernorm.weight           -> blk.17.attn_norm.weight                  | BF16   | [4096]
model.layers.17.mlp.down_proj.weight             -> blk.17.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.17.mlp.gate_proj.weight             -> blk.17.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.17.mlp.up_proj.weight               -> blk.17.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.17.post_attention_layernorm.weight  -> blk.17.ffn_norm.weight                   | BF16   | [4096]
model.layers.17.self_attn.k_proj.weight          -> blk.17.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.17.self_attn.o_proj.weight          -> blk.17.attn_output.weight                | BF16   | [4096, 4096]
model.layers.17.self_attn.q_proj.weight          -> blk.17.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.17.self_attn.v_proj.weight          -> blk.17.attn_v.weight                     | BF16   | [1024, 4096]
model.layers.18.input_layernorm.weight           -> blk.18.attn_norm.weight                  | BF16   | [4096]
model.layers.18.mlp.down_proj.weight             -> blk.18.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.18.mlp.gate_proj.weight             -> blk.18.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.18.mlp.up_proj.weight               -> blk.18.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.18.post_attention_layernorm.weight  -> blk.18.ffn_norm.weight                   | BF16   | [4096]
model.layers.18.self_attn.k_proj.weight          -> blk.18.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.18.self_attn.o_proj.weight          -> blk.18.attn_output.weight                | BF16   | [4096, 4096]
model.layers.18.self_attn.q_proj.weight          -> blk.18.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.18.self_attn.v_proj.weight          -> blk.18.attn_v.weight                     | BF16   | [1024, 4096]
model.layers.19.input_layernorm.weight           -> blk.19.attn_norm.weight                  | BF16   | [4096]
model.layers.19.mlp.down_proj.weight             -> blk.19.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.19.mlp.gate_proj.weight             -> blk.19.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.19.mlp.up_proj.weight               -> blk.19.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.19.post_attention_layernorm.weight  -> blk.19.ffn_norm.weight                   | BF16   | [4096]
model.layers.19.self_attn.k_proj.weight          -> blk.19.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.19.self_attn.o_proj.weight          -> blk.19.attn_output.weight                | BF16   | [4096, 4096]
model.layers.19.self_attn.q_proj.weight          -> blk.19.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.19.self_attn.v_proj.weight          -> blk.19.attn_v.weight                     | BF16   | [1024, 4096]
model.layers.20.mlp.gate_proj.weight             -> blk.20.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.20.self_attn.k_proj.weight          -> blk.20.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.20.self_attn.o_proj.weight          -> blk.20.attn_output.weight                | BF16   | [4096, 4096]
model.layers.20.self_attn.q_proj.weight          -> blk.20.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.20.self_attn.v_proj.weight          -> blk.20.attn_v.weight                     | BF16   | [1024, 4096]
model.layers.9.input_layernorm.weight            -> blk.9.attn_norm.weight                   | BF16   | [4096]
model.layers.9.mlp.down_proj.weight              -> blk.9.ffn_down.weight                    | BF16   | [4096, 14336]
model.layers.9.mlp.gate_proj.weight              -> blk.9.ffn_gate.weight                    | BF16   | [14336, 4096]
model.layers.9.mlp.up_proj.weight                -> blk.9.ffn_up.weight                      | BF16   | [14336, 4096]
model.layers.9.post_attention_layernorm.weight   -> blk.9.ffn_norm.weight                    | BF16   | [4096]
model.layers.9.self_attn.k_proj.weight           -> blk.9.attn_k.weight                      | BF16   | [1024, 4096]
model.layers.9.self_attn.o_proj.weight           -> blk.9.attn_output.weight                 | BF16   | [4096, 4096]
model.layers.9.self_attn.q_proj.weight           -> blk.9.attn_q.weight                      | BF16   | [4096, 4096]
model.layers.9.self_attn.v_proj.weight           -> blk.9.attn_v.weight                      | BF16   | [1024, 4096]
model.layers.20.input_layernorm.weight           -> blk.20.attn_norm.weight                  | BF16   | [4096]
model.layers.20.mlp.down_proj.weight             -> blk.20.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.20.mlp.up_proj.weight               -> blk.20.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.20.post_attention_layernorm.weight  -> blk.20.ffn_norm.weight                   | BF16   | [4096]
model.layers.21.input_layernorm.weight           -> blk.21.attn_norm.weight                  | BF16   | [4096]
model.layers.21.mlp.down_proj.weight             -> blk.21.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.21.mlp.gate_proj.weight             -> blk.21.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.21.mlp.up_proj.weight               -> blk.21.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.21.post_attention_layernorm.weight  -> blk.21.ffn_norm.weight                   | BF16   | [4096]
model.layers.21.self_attn.k_proj.weight          -> blk.21.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.21.self_attn.o_proj.weight          -> blk.21.attn_output.weight                | BF16   | [4096, 4096]
model.layers.21.self_attn.q_proj.weight          -> blk.21.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.21.self_attn.v_proj.weight          -> blk.21.attn_v.weight                     | BF16   | [1024, 4096]
model.layers.22.input_layernorm.weight           -> blk.22.attn_norm.weight                  | BF16   | [4096]
model.layers.22.mlp.down_proj.weight             -> blk.22.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.22.mlp.gate_proj.weight             -> blk.22.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.22.mlp.up_proj.weight               -> blk.22.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.22.post_attention_layernorm.weight  -> blk.22.ffn_norm.weight                   | BF16   | [4096]
model.layers.22.self_attn.k_proj.weight          -> blk.22.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.22.self_attn.o_proj.weight          -> blk.22.attn_output.weight                | BF16   | [4096, 4096]
model.layers.22.self_attn.q_proj.weight          -> blk.22.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.22.self_attn.v_proj.weight          -> blk.22.attn_v.weight                     | BF16   | [1024, 4096]
model.layers.23.input_layernorm.weight           -> blk.23.attn_norm.weight                  | BF16   | [4096]
model.layers.23.mlp.down_proj.weight             -> blk.23.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.23.mlp.gate_proj.weight             -> blk.23.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.23.mlp.up_proj.weight               -> blk.23.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.23.post_attention_layernorm.weight  -> blk.23.ffn_norm.weight                   | BF16   | [4096]
model.layers.23.self_attn.k_proj.weight          -> blk.23.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.23.self_attn.o_proj.weight          -> blk.23.attn_output.weight                | BF16   | [4096, 4096]
model.layers.23.self_attn.q_proj.weight          -> blk.23.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.23.self_attn.v_proj.weight          -> blk.23.attn_v.weight                     | BF16   | [1024, 4096]
model.layers.24.input_layernorm.weight           -> blk.24.attn_norm.weight                  | BF16   | [4096]
model.layers.24.mlp.down_proj.weight             -> blk.24.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.24.mlp.gate_proj.weight             -> blk.24.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.24.mlp.up_proj.weight               -> blk.24.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.24.post_attention_layernorm.weight  -> blk.24.ffn_norm.weight                   | BF16   | [4096]
model.layers.24.self_attn.k_proj.weight          -> blk.24.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.24.self_attn.o_proj.weight          -> blk.24.attn_output.weight                | BF16   | [4096, 4096]
model.layers.24.self_attn.q_proj.weight          -> blk.24.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.24.self_attn.v_proj.weight          -> blk.24.attn_v.weight                     | BF16   | [1024, 4096]
model.layers.25.input_layernorm.weight           -> blk.25.attn_norm.weight                  | BF16   | [4096]
model.layers.25.mlp.down_proj.weight             -> blk.25.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.25.mlp.gate_proj.weight             -> blk.25.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.25.mlp.up_proj.weight               -> blk.25.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.25.post_attention_layernorm.weight  -> blk.25.ffn_norm.weight                   | BF16   | [4096]
model.layers.25.self_attn.k_proj.weight          -> blk.25.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.25.self_attn.o_proj.weight          -> blk.25.attn_output.weight                | BF16   | [4096, 4096]
model.layers.25.self_attn.q_proj.weight          -> blk.25.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.25.self_attn.v_proj.weight          -> blk.25.attn_v.weight                     | BF16   | [1024, 4096]
model.layers.26.input_layernorm.weight           -> blk.26.attn_norm.weight                  | BF16   | [4096]
model.layers.26.mlp.down_proj.weight             -> blk.26.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.26.mlp.gate_proj.weight             -> blk.26.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.26.mlp.up_proj.weight               -> blk.26.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.26.post_attention_layernorm.weight  -> blk.26.ffn_norm.weight                   | BF16   | [4096]
model.layers.26.self_attn.k_proj.weight          -> blk.26.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.26.self_attn.o_proj.weight          -> blk.26.attn_output.weight                | BF16   | [4096, 4096]
model.layers.26.self_attn.q_proj.weight          -> blk.26.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.26.self_attn.v_proj.weight          -> blk.26.attn_v.weight                     | BF16   | [1024, 4096]
model.layers.27.input_layernorm.weight           -> blk.27.attn_norm.weight                  | BF16   | [4096]
model.layers.27.mlp.down_proj.weight             -> blk.27.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.27.mlp.gate_proj.weight             -> blk.27.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.27.mlp.up_proj.weight               -> blk.27.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.27.post_attention_layernorm.weight  -> blk.27.ffn_norm.weight                   | BF16   | [4096]
model.layers.27.self_attn.k_proj.weight          -> blk.27.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.27.self_attn.o_proj.weight          -> blk.27.attn_output.weight                | BF16   | [4096, 4096]
model.layers.27.self_attn.q_proj.weight          -> blk.27.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.27.self_attn.v_proj.weight          -> blk.27.attn_v.weight                     | BF16   | [1024, 4096]
model.layers.28.input_layernorm.weight           -> blk.28.attn_norm.weight                  | BF16   | [4096]
model.layers.28.mlp.down_proj.weight             -> blk.28.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.28.mlp.gate_proj.weight             -> blk.28.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.28.mlp.up_proj.weight               -> blk.28.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.28.post_attention_layernorm.weight  -> blk.28.ffn_norm.weight                   | BF16   | [4096]
model.layers.28.self_attn.k_proj.weight          -> blk.28.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.28.self_attn.o_proj.weight          -> blk.28.attn_output.weight                | BF16   | [4096, 4096]
model.layers.28.self_attn.q_proj.weight          -> blk.28.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.28.self_attn.v_proj.weight          -> blk.28.attn_v.weight                     | BF16   | [1024, 4096]
model.layers.29.input_layernorm.weight           -> blk.29.attn_norm.weight                  | BF16   | [4096]
model.layers.29.mlp.down_proj.weight             -> blk.29.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.29.mlp.gate_proj.weight             -> blk.29.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.29.mlp.up_proj.weight               -> blk.29.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.29.post_attention_layernorm.weight  -> blk.29.ffn_norm.weight                   | BF16   | [4096]
model.layers.29.self_attn.k_proj.weight          -> blk.29.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.29.self_attn.o_proj.weight          -> blk.29.attn_output.weight                | BF16   | [4096, 4096]
model.layers.29.self_attn.q_proj.weight          -> blk.29.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.29.self_attn.v_proj.weight          -> blk.29.attn_v.weight                     | BF16   | [1024, 4096]
model.layers.30.input_layernorm.weight           -> blk.30.attn_norm.weight                  | BF16   | [4096]
model.layers.30.mlp.down_proj.weight             -> blk.30.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.30.mlp.gate_proj.weight             -> blk.30.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.30.mlp.up_proj.weight               -> blk.30.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.30.post_attention_layernorm.weight  -> blk.30.ffn_norm.weight                   | BF16   | [4096]
model.layers.30.self_attn.k_proj.weight          -> blk.30.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.30.self_attn.o_proj.weight          -> blk.30.attn_output.weight                | BF16   | [4096, 4096]
model.layers.30.self_attn.q_proj.weight          -> blk.30.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.30.self_attn.v_proj.weight          -> blk.30.attn_v.weight                     | BF16   | [1024, 4096]
model.layers.31.mlp.gate_proj.weight             -> blk.31.ffn_gate.weight                   | BF16   | [14336, 4096]
model.layers.31.mlp.up_proj.weight               -> blk.31.ffn_up.weight                     | BF16   | [14336, 4096]
model.layers.31.self_attn.k_proj.weight          -> blk.31.attn_k.weight                     | BF16   | [1024, 4096]
model.layers.31.self_attn.o_proj.weight          -> blk.31.attn_output.weight                | BF16   | [4096, 4096]
model.layers.31.self_attn.q_proj.weight          -> blk.31.attn_q.weight                     | BF16   | [4096, 4096]
model.layers.31.self_attn.v_proj.weight          -> blk.31.attn_v.weight                     | BF16   | [1024, 4096]
lm_head.weight                                   -> output.weight                            | BF16   | [128256, 4096]
model.layers.31.input_layernorm.weight           -> blk.31.attn_norm.weight                  | BF16   | [4096]
model.layers.31.mlp.down_proj.weight             -> blk.31.ffn_down.weight                   | BF16   | [4096, 14336]
model.layers.31.post_attention_layernorm.weight  -> blk.31.ffn_norm.weight                   | BF16   | [4096]
model.norm.weight                                -> output_norm.weight                       | BF16   | [4096]
Writing models/8B-v3-instruct/ggml-model-f16.gguf, format 1
Ignoring added_tokens.json since model matches vocab size without it.
gguf: This GGUF file is for Little Endian only
gguf: Adding 280147 merge(s).
gguf: Setting special token type bos to 128000
gguf: Setting special token type eos to 128001
gguf: Setting chat_template to {% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>

'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>

' }}{% endif %}
[  1/291] Writing tensor token_embd.weight                      | size 128256 x   4096  | type F16  | T+   1
[  2/291] Writing tensor blk.0.attn_norm.weight                 | size   4096           | type F32  | T+   1
[  3/291] Writing tensor blk.0.ffn_down.weight                  | size   4096 x  14336  | type F16  | T+   1
[  4/291] Writing tensor blk.0.ffn_gate.weight                  | size  14336 x   4096  | type F16  | T+   1
[  5/291] Writing tensor blk.0.ffn_up.weight                    | size  14336 x   4096  | type F16  | T+   1
[  6/291] Writing tensor blk.0.ffn_norm.weight                  | size   4096           | type F32  | T+   1
[  7/291] Writing tensor blk.0.attn_k.weight                    | size   1024 x   4096  | type F16  | T+   1
[  8/291] Writing tensor blk.0.attn_output.weight               | size   4096 x   4096  | type F16  | T+   1
[  9/291] Writing tensor blk.0.attn_q.weight                    | size   4096 x   4096  | type F16  | T+   1
[ 10/291] Writing tensor blk.0.attn_v.weight                    | size   1024 x   4096  | type F16  | T+   1
[ 11/291] Writing tensor blk.1.attn_norm.weight                 | size   4096           | type F32  | T+   1
[ 12/291] Writing tensor blk.1.ffn_down.weight                  | size   4096 x  14336  | type F16  | T+   1
[ 13/291] Writing tensor blk.1.ffn_gate.weight                  | size  14336 x   4096  | type F16  | T+   1
[ 14/291] Writing tensor blk.1.ffn_up.weight                    | size  14336 x   4096  | type F16  | T+   1
[ 15/291] Writing tensor blk.1.ffn_norm.weight                  | size   4096           | type F32  | T+   1
[ 16/291] Writing tensor blk.1.attn_k.weight                    | size   1024 x   4096  | type F16  | T+   1
[ 17/291] Writing tensor blk.1.attn_output.weight               | size   4096 x   4096  | type F16  | T+   1
[ 18/291] Writing tensor blk.1.attn_q.weight                    | size   4096 x   4096  | type F16  | T+   1
[ 19/291] Writing tensor blk.1.attn_v.weight                    | size   1024 x   4096  | type F16  | T+   1
[ 20/291] Writing tensor blk.2.attn_norm.weight                 | size   4096           | type F32  | T+   1
[ 21/291] Writing tensor blk.2.ffn_down.weight                  | size   4096 x  14336  | type F16  | T+   1
[ 22/291] Writing tensor blk.2.ffn_gate.weight                  | size  14336 x   4096  | type F16  | T+   1
[ 23/291] Writing tensor blk.2.ffn_up.weight                    | size  14336 x   4096  | type F16  | T+   1
[ 24/291] Writing tensor blk.2.ffn_norm.weight                  | size   4096           | type F32  | T+   1
[ 25/291] Writing tensor blk.2.attn_k.weight                    | size   1024 x   4096  | type F16  | T+   1
[ 26/291] Writing tensor blk.2.attn_output.weight               | size   4096 x   4096  | type F16  | T+   1
[ 27/291] Writing tensor blk.2.attn_q.weight                    | size   4096 x   4096  | type F16  | T+   1
[ 28/291] Writing tensor blk.2.attn_v.weight                    | size   1024 x   4096  | type F16  | T+   1
[ 29/291] Writing tensor blk.3.attn_norm.weight                 | size   4096           | type F32  | T+   1
[ 30/291] Writing tensor blk.3.ffn_down.weight                  | size   4096 x  14336  | type F16  | T+   2
[ 31/291] Writing tensor blk.3.ffn_gate.weight                  | size  14336 x   4096  | type F16  | T+   2
[ 32/291] Writing tensor blk.3.ffn_up.weight                    | size  14336 x   4096  | type F16  | T+   2
[ 33/291] Writing tensor blk.3.ffn_norm.weight                  | size   4096           | type F32  | T+   2
[ 34/291] Writing tensor blk.3.attn_k.weight                    | size   1024 x   4096  | type F16  | T+   2
[ 35/291] Writing tensor blk.3.attn_output.weight               | size   4096 x   4096  | type F16  | T+   2
[ 36/291] Writing tensor blk.3.attn_q.weight                    | size   4096 x   4096  | type F16  | T+   2
[ 37/291] Writing tensor blk.3.attn_v.weight                    | size   1024 x   4096  | type F16  | T+   2
[ 38/291] Writing tensor blk.4.attn_norm.weight                 | size   4096           | type F32  | T+   2
[ 39/291] Writing tensor blk.4.ffn_down.weight                  | size   4096 x  14336  | type F16  | T+   2
[ 40/291] Writing tensor blk.4.ffn_gate.weight                  | size  14336 x   4096  | type F16  | T+   2
[ 41/291] Writing tensor blk.4.ffn_up.weight                    | size  14336 x   4096  | type F16  | T+   2
[ 42/291] Writing tensor blk.4.ffn_norm.weight                  | size   4096           | type F32  | T+   2
[ 43/291] Writing tensor blk.4.attn_k.weight                    | size   1024 x   4096  | type F16  | T+   2
[ 44/291] Writing tensor blk.4.attn_output.weight               | size   4096 x   4096  | type F16  | T+   2
[ 45/291] Writing tensor blk.4.attn_q.weight                    | size   4096 x   4096  | type F16  | T+   2
[ 46/291] Writing tensor blk.4.attn_v.weight                    | size   1024 x   4096  | type F16  | T+   2
[ 47/291] Writing tensor blk.5.attn_norm.weight                 | size   4096           | type F32  | T+   2
[ 48/291] Writing tensor blk.5.ffn_down.weight                  | size   4096 x  14336  | type F16  | T+   2
[ 49/291] Writing tensor blk.5.ffn_gate.weight                  | size  14336 x   4096  | type F16  | T+   2
[ 50/291] Writing tensor blk.5.ffn_up.weight                    | size  14336 x   4096  | type F16  | T+   2
[ 51/291] Writing tensor blk.5.ffn_norm.weight                  | size   4096           | type F32  | T+   2
[ 52/291] Writing tensor blk.5.attn_k.weight                    | size   1024 x   4096  | type F16  | T+   2
[ 53/291] Writing tensor blk.5.attn_output.weight               | size   4096 x   4096  | type F16  | T+   2
[ 54/291] Writing tensor blk.5.attn_q.weight                    | size   4096 x   4096  | type F16  | T+   2
[ 55/291] Writing tensor blk.5.attn_v.weight                    | size   1024 x   4096  | type F16  | T+   2
[ 56/291] Writing tensor blk.6.attn_norm.weight                 | size   4096           | type F32  | T+   2
[ 57/291] Writing tensor blk.6.ffn_down.weight                  | size   4096 x  14336  | type F16  | T+   2
[ 58/291] Writing tensor blk.6.ffn_gate.weight                  | size  14336 x   4096  | type F16  | T+   2
[ 59/291] Writing tensor blk.6.ffn_up.weight                    | size  14336 x   4096  | type F16  | T+   2
[ 60/291] Writing tensor blk.6.ffn_norm.weight                  | size   4096           | type F32  | T+   2
[ 61/291] Writing tensor blk.6.attn_k.weight                    | size   1024 x   4096  | type F16  | T+   2
[ 62/291] Writing tensor blk.6.attn_output.weight               | size   4096 x   4096  | type F16  | T+   2
[ 63/291] Writing tensor blk.6.attn_q.weight                    | size   4096 x   4096  | type F16  | T+   2
[ 64/291] Writing tensor blk.6.attn_v.weight                    | size   1024 x   4096  | type F16  | T+   2
[ 65/291] Writing tensor blk.7.attn_norm.weight                 | size   4096           | type F32  | T+   2
[ 66/291] Writing tensor blk.7.ffn_down.weight                  | size   4096 x  14336  | type F16  | T+   2
[ 67/291] Writing tensor blk.7.ffn_gate.weight                  | size  14336 x   4096  | type F16  | T+   2
[ 68/291] Writing tensor blk.7.ffn_up.weight                    | size  14336 x   4096  | type F16  | T+   2
[ 69/291] Writing tensor blk.7.ffn_norm.weight                  | size   4096           | type F32  | T+   2
[ 70/291] Writing tensor blk.7.attn_k.weight                    | size   1024 x   4096  | type F16  | T+   2
[ 71/291] Writing tensor blk.7.attn_output.weight               | size   4096 x   4096  | type F16  | T+   2
[ 72/291] Writing tensor blk.7.attn_q.weight                    | size   4096 x   4096  | type F16  | T+   2
[ 73/291] Writing tensor blk.7.attn_v.weight                    | size   1024 x   4096  | type F16  | T+   2
[ 74/291] Writing tensor blk.8.attn_norm.weight                 | size   4096           | type F32  | T+   2
[ 75/291] Writing tensor blk.8.ffn_down.weight                  | size   4096 x  14336  | type F16  | T+   3
[ 76/291] Writing tensor blk.8.ffn_gate.weight                  | size  14336 x   4096  | type F16  | T+   3
[ 77/291] Writing tensor blk.8.ffn_up.weight                    | size  14336 x   4096  | type F16  | T+   3
[ 78/291] Writing tensor blk.8.ffn_norm.weight                  | size   4096           | type F32  | T+   3
[ 79/291] Writing tensor blk.8.attn_k.weight                    | size   1024 x   4096  | type F16  | T+   3
[ 80/291] Writing tensor blk.8.attn_output.weight               | size   4096 x   4096  | type F16  | T+   3
[ 81/291] Writing tensor blk.8.attn_q.weight                    | size   4096 x   4096  | type F16  | T+   3
[ 82/291] Writing tensor blk.8.attn_v.weight                    | size   1024 x   4096  | type F16  | T+   3
[ 83/291] Writing tensor blk.10.attn_norm.weight                | size   4096           | type F32  | T+   3
[ 84/291] Writing tensor blk.10.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   3
[ 85/291] Writing tensor blk.10.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   3
[ 86/291] Writing tensor blk.10.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   3
[ 87/291] Writing tensor blk.10.ffn_norm.weight                 | size   4096           | type F32  | T+   3
[ 88/291] Writing tensor blk.10.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   3
[ 89/291] Writing tensor blk.10.attn_output.weight              | size   4096 x   4096  | type F16  | T+   3
[ 90/291] Writing tensor blk.10.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   3
[ 91/291] Writing tensor blk.10.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   3
[ 92/291] Writing tensor blk.11.attn_norm.weight                | size   4096           | type F32  | T+   3
[ 93/291] Writing tensor blk.11.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   3
[ 94/291] Writing tensor blk.11.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   3
[ 95/291] Writing tensor blk.11.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   3
[ 96/291] Writing tensor blk.11.ffn_norm.weight                 | size   4096           | type F32  | T+   3
[ 97/291] Writing tensor blk.11.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   3
[ 98/291] Writing tensor blk.11.attn_output.weight              | size   4096 x   4096  | type F16  | T+   3
[ 99/291] Writing tensor blk.11.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   3
[100/291] Writing tensor blk.11.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   3
[101/291] Writing tensor blk.12.attn_norm.weight                | size   4096           | type F32  | T+   3
[102/291] Writing tensor blk.12.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   3
[103/291] Writing tensor blk.12.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   3
[104/291] Writing tensor blk.12.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   3
[105/291] Writing tensor blk.12.ffn_norm.weight                 | size   4096           | type F32  | T+   3
[106/291] Writing tensor blk.12.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   3
[107/291] Writing tensor blk.12.attn_output.weight              | size   4096 x   4096  | type F16  | T+   3
[108/291] Writing tensor blk.12.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   3
[109/291] Writing tensor blk.12.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   3
[110/291] Writing tensor blk.13.attn_norm.weight                | size   4096           | type F32  | T+   3
[111/291] Writing tensor blk.13.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   3
[112/291] Writing tensor blk.13.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   3
[113/291] Writing tensor blk.13.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   3
[114/291] Writing tensor blk.13.ffn_norm.weight                 | size   4096           | type F32  | T+   3
[115/291] Writing tensor blk.13.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   3
[116/291] Writing tensor blk.13.attn_output.weight              | size   4096 x   4096  | type F16  | T+   3
[117/291] Writing tensor blk.13.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   3
[118/291] Writing tensor blk.13.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   3
[119/291] Writing tensor blk.14.attn_norm.weight                | size   4096           | type F32  | T+   3
[120/291] Writing tensor blk.14.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   3
[121/291] Writing tensor blk.14.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   3
[122/291] Writing tensor blk.14.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   3
[123/291] Writing tensor blk.14.ffn_norm.weight                 | size   4096           | type F32  | T+   4
[124/291] Writing tensor blk.14.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   4
[125/291] Writing tensor blk.14.attn_output.weight              | size   4096 x   4096  | type F16  | T+   4
[126/291] Writing tensor blk.14.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   4
[127/291] Writing tensor blk.14.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   4
[128/291] Writing tensor blk.15.attn_norm.weight                | size   4096           | type F32  | T+   4
[129/291] Writing tensor blk.15.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   4
[130/291] Writing tensor blk.15.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   4
[131/291] Writing tensor blk.15.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   4
[132/291] Writing tensor blk.15.ffn_norm.weight                 | size   4096           | type F32  | T+   4
[133/291] Writing tensor blk.15.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   4
[134/291] Writing tensor blk.15.attn_output.weight              | size   4096 x   4096  | type F16  | T+   4
[135/291] Writing tensor blk.15.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   4
[136/291] Writing tensor blk.15.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   4
[137/291] Writing tensor blk.16.attn_norm.weight                | size   4096           | type F32  | T+   4
[138/291] Writing tensor blk.16.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   4
[139/291] Writing tensor blk.16.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   4
[140/291] Writing tensor blk.16.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   4
[141/291] Writing tensor blk.16.ffn_norm.weight                 | size   4096           | type F32  | T+   4
[142/291] Writing tensor blk.16.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   4
[143/291] Writing tensor blk.16.attn_output.weight              | size   4096 x   4096  | type F16  | T+   4
[144/291] Writing tensor blk.16.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   4
[145/291] Writing tensor blk.16.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   4
[146/291] Writing tensor blk.17.attn_norm.weight                | size   4096           | type F32  | T+   4
[147/291] Writing tensor blk.17.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   4
[148/291] Writing tensor blk.17.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   4
[149/291] Writing tensor blk.17.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   4
[150/291] Writing tensor blk.17.ffn_norm.weight                 | size   4096           | type F32  | T+   4
[151/291] Writing tensor blk.17.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   4
[152/291] Writing tensor blk.17.attn_output.weight              | size   4096 x   4096  | type F16  | T+   4
[153/291] Writing tensor blk.17.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   4
[154/291] Writing tensor blk.17.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   4
[155/291] Writing tensor blk.18.attn_norm.weight                | size   4096           | type F32  | T+   4
[156/291] Writing tensor blk.18.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   4
[157/291] Writing tensor blk.18.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   4
[158/291] Writing tensor blk.18.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   4
[159/291] Writing tensor blk.18.ffn_norm.weight                 | size   4096           | type F32  | T+   4
[160/291] Writing tensor blk.18.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   4
[161/291] Writing tensor blk.18.attn_output.weight              | size   4096 x   4096  | type F16  | T+   4
[162/291] Writing tensor blk.18.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   4
[163/291] Writing tensor blk.18.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   4
[164/291] Writing tensor blk.19.attn_norm.weight                | size   4096           | type F32  | T+   4
[165/291] Writing tensor blk.19.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   4
[166/291] Writing tensor blk.19.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   4
[167/291] Writing tensor blk.19.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   4
[168/291] Writing tensor blk.19.ffn_norm.weight                 | size   4096           | type F32  | T+   4
[169/291] Writing tensor blk.19.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   4
[170/291] Writing tensor blk.19.attn_output.weight              | size   4096 x   4096  | type F16  | T+   4
[171/291] Writing tensor blk.19.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   4
[172/291] Writing tensor blk.19.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   4
[173/291] Writing tensor blk.20.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   5
[174/291] Writing tensor blk.20.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   5
[175/291] Writing tensor blk.20.attn_output.weight              | size   4096 x   4096  | type F16  | T+   5
[176/291] Writing tensor blk.20.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   5
[177/291] Writing tensor blk.20.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   5
[178/291] Writing tensor blk.9.attn_norm.weight                 | size   4096           | type F32  | T+   5
[179/291] Writing tensor blk.9.ffn_down.weight                  | size   4096 x  14336  | type F16  | T+   5
[180/291] Writing tensor blk.9.ffn_gate.weight                  | size  14336 x   4096  | type F16  | T+   5
[181/291] Writing tensor blk.9.ffn_up.weight                    | size  14336 x   4096  | type F16  | T+   5
[182/291] Writing tensor blk.9.ffn_norm.weight                  | size   4096           | type F32  | T+   5
[183/291] Writing tensor blk.9.attn_k.weight                    | size   1024 x   4096  | type F16  | T+   5
[184/291] Writing tensor blk.9.attn_output.weight               | size   4096 x   4096  | type F16  | T+   5
[185/291] Writing tensor blk.9.attn_q.weight                    | size   4096 x   4096  | type F16  | T+   5
[186/291] Writing tensor blk.9.attn_v.weight                    | size   1024 x   4096  | type F16  | T+   5
[187/291] Writing tensor blk.20.attn_norm.weight                | size   4096           | type F32  | T+   5
[188/291] Writing tensor blk.20.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   5
[189/291] Writing tensor blk.20.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   5
[190/291] Writing tensor blk.20.ffn_norm.weight                 | size   4096           | type F32  | T+   5
[191/291] Writing tensor blk.21.attn_norm.weight                | size   4096           | type F32  | T+   5
[192/291] Writing tensor blk.21.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   5
[193/291] Writing tensor blk.21.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   5
[194/291] Writing tensor blk.21.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   5
[195/291] Writing tensor blk.21.ffn_norm.weight                 | size   4096           | type F32  | T+   5
[196/291] Writing tensor blk.21.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   5
[197/291] Writing tensor blk.21.attn_output.weight              | size   4096 x   4096  | type F16  | T+   5
[198/291] Writing tensor blk.21.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   5
[199/291] Writing tensor blk.21.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   5
[200/291] Writing tensor blk.22.attn_norm.weight                | size   4096           | type F32  | T+   5
[201/291] Writing tensor blk.22.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   5
[202/291] Writing tensor blk.22.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   5
[203/291] Writing tensor blk.22.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   5
[204/291] Writing tensor blk.22.ffn_norm.weight                 | size   4096           | type F32  | T+   5
[205/291] Writing tensor blk.22.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   5
[206/291] Writing tensor blk.22.attn_output.weight              | size   4096 x   4096  | type F16  | T+   5
[207/291] Writing tensor blk.22.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   5
[208/291] Writing tensor blk.22.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   5
[209/291] Writing tensor blk.23.attn_norm.weight                | size   4096           | type F32  | T+   5
[210/291] Writing tensor blk.23.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   5
[211/291] Writing tensor blk.23.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   5
[212/291] Writing tensor blk.23.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   5
[213/291] Writing tensor blk.23.ffn_norm.weight                 | size   4096           | type F32  | T+   5
[214/291] Writing tensor blk.23.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   5
[215/291] Writing tensor blk.23.attn_output.weight              | size   4096 x   4096  | type F16  | T+   5
[216/291] Writing tensor blk.23.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   5
[217/291] Writing tensor blk.23.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   5
[218/291] Writing tensor blk.24.attn_norm.weight                | size   4096           | type F32  | T+   5
[219/291] Writing tensor blk.24.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   5
[220/291] Writing tensor blk.24.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   5
[221/291] Writing tensor blk.24.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   5
[222/291] Writing tensor blk.24.ffn_norm.weight                 | size   4096           | type F32  | T+   6
[223/291] Writing tensor blk.24.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   6
[224/291] Writing tensor blk.24.attn_output.weight              | size   4096 x   4096  | type F16  | T+   6
[225/291] Writing tensor blk.24.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   6
[226/291] Writing tensor blk.24.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   6
[227/291] Writing tensor blk.25.attn_norm.weight                | size   4096           | type F32  | T+   6
[228/291] Writing tensor blk.25.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   6
[229/291] Writing tensor blk.25.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   6
[230/291] Writing tensor blk.25.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   6
[231/291] Writing tensor blk.25.ffn_norm.weight                 | size   4096           | type F32  | T+   6
[232/291] Writing tensor blk.25.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   6
[233/291] Writing tensor blk.25.attn_output.weight              | size   4096 x   4096  | type F16  | T+   6
[234/291] Writing tensor blk.25.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   6
[235/291] Writing tensor blk.25.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   6
[236/291] Writing tensor blk.26.attn_norm.weight                | size   4096           | type F32  | T+   6
[237/291] Writing tensor blk.26.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   6
[238/291] Writing tensor blk.26.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   6
[239/291] Writing tensor blk.26.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   6
[240/291] Writing tensor blk.26.ffn_norm.weight                 | size   4096           | type F32  | T+   6
[241/291] Writing tensor blk.26.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   6
[242/291] Writing tensor blk.26.attn_output.weight              | size   4096 x   4096  | type F16  | T+   6
[243/291] Writing tensor blk.26.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   6
[244/291] Writing tensor blk.26.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   6
[245/291] Writing tensor blk.27.attn_norm.weight                | size   4096           | type F32  | T+   6
[246/291] Writing tensor blk.27.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   6
[247/291] Writing tensor blk.27.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   6
[248/291] Writing tensor blk.27.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   6
[249/291] Writing tensor blk.27.ffn_norm.weight                 | size   4096           | type F32  | T+   6
[250/291] Writing tensor blk.27.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   6
[251/291] Writing tensor blk.27.attn_output.weight              | size   4096 x   4096  | type F16  | T+   6
[252/291] Writing tensor blk.27.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   6
[253/291] Writing tensor blk.27.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   6
[254/291] Writing tensor blk.28.attn_norm.weight                | size   4096           | type F32  | T+   6
[255/291] Writing tensor blk.28.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   6
[256/291] Writing tensor blk.28.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   6
[257/291] Writing tensor blk.28.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   6
[258/291] Writing tensor blk.28.ffn_norm.weight                 | size   4096           | type F32  | T+   6
[259/291] Writing tensor blk.28.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   6
[260/291] Writing tensor blk.28.attn_output.weight              | size   4096 x   4096  | type F16  | T+   6
[261/291] Writing tensor blk.28.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   6
[262/291] Writing tensor blk.28.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   6
[263/291] Writing tensor blk.29.attn_norm.weight                | size   4096           | type F32  | T+   6
[264/291] Writing tensor blk.29.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   6
[265/291] Writing tensor blk.29.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   6
[266/291] Writing tensor blk.29.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   6
[267/291] Writing tensor blk.29.ffn_norm.weight                 | size   4096           | type F32  | T+   6
[268/291] Writing tensor blk.29.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   6
[269/291] Writing tensor blk.29.attn_output.weight              | size   4096 x   4096  | type F16  | T+   6
[270/291] Writing tensor blk.29.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   6
[271/291] Writing tensor blk.29.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   6
[272/291] Writing tensor blk.30.attn_norm.weight                | size   4096           | type F32  | T+   6
[273/291] Writing tensor blk.30.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   7
[274/291] Writing tensor blk.30.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   7
[275/291] Writing tensor blk.30.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   7
[276/291] Writing tensor blk.30.ffn_norm.weight                 | size   4096           | type F32  | T+   7
[277/291] Writing tensor blk.30.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   7
[278/291] Writing tensor blk.30.attn_output.weight              | size   4096 x   4096  | type F16  | T+   7
[279/291] Writing tensor blk.30.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   7
[280/291] Writing tensor blk.30.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   7
[281/291] Writing tensor blk.31.ffn_gate.weight                 | size  14336 x   4096  | type F16  | T+   7
[282/291] Writing tensor blk.31.ffn_up.weight                   | size  14336 x   4096  | type F16  | T+   7
[283/291] Writing tensor blk.31.attn_k.weight                   | size   1024 x   4096  | type F16  | T+   7
[284/291] Writing tensor blk.31.attn_output.weight              | size   4096 x   4096  | type F16  | T+   7
[285/291] Writing tensor blk.31.attn_q.weight                   | size   4096 x   4096  | type F16  | T+   7
[286/291] Writing tensor blk.31.attn_v.weight                   | size   1024 x   4096  | type F16  | T+   7
[287/291] Writing tensor output.weight                          | size 128256 x   4096  | type F16  | T+   8
[288/291] Writing tensor blk.31.attn_norm.weight                | size   4096           | type F32  | T+   8
[289/291] Writing tensor blk.31.ffn_down.weight                 | size   4096 x  14336  | type F16  | T+   8
[290/291] Writing tensor blk.31.ffn_norm.weight                 | size   4096           | type F32  | T+   8
[291/291] Writing tensor output_norm.weight                     | size   4096           | type F32  | T+   8
Wrote models/8B-v3-instruct/ggml-model-f16.gguf
main: build = 2778 (3ea0d360)
main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.4.0
main: quantizing 'models/8B-v3-instruct/ggml-model-f16.gguf' to 'models/8B-v3-instruct/ggml-model-Q4_K_M.gguf' as Q4_K_M
llama_model_loader: loaded meta data with 21 key-value pairs and 291 tensors from models/8B-v3-instruct/ggml-model-f16.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = models
llama_model_loader: - kv   2:                           llama.vocab_size u32              = 128256
llama_model_loader: - kv   3:                       llama.context_length u32              = 8192
llama_model_loader: - kv   4:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   5:                          llama.block_count u32              = 32
llama_model_loader: - kv   6:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   7:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   8:                 llama.attention.head_count u32              = 32
llama_model_loader: - kv   9:              llama.attention.head_count_kv u32              = 8
llama_model_loader: - kv  10:     llama.attention.layer_norm_rms_epsilon f32              = 0.000010
llama_model_loader: - kv  11:                       llama.rope.freq_base f32              = 500000.000000
llama_model_loader: - kv  12:                          general.file_type u32              = 1
llama_model_loader: - kv  13:                       tokenizer.ggml.model str              = gpt2
llama_model_loader: - kv  14:                      tokenizer.ggml.tokens arr[str,128256]  = ["!", "\"", "#", "$", "%", "&", "'", ...
llama_model_loader: - kv  15:                      tokenizer.ggml.scores arr[f32,128256]  = [0.000000, 0.000000, 0.000000, 0.0000...
llama_model_loader: - kv  16:                  tokenizer.ggml.token_type arr[i32,128256]  = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
llama_model_loader: - kv  17:                      tokenizer.ggml.merges arr[str,280147]  = ["Ġ Ġ", "Ġ ĠĠĠ", "ĠĠ ĠĠ", "...
llama_model_loader: - kv  18:                tokenizer.ggml.bos_token_id u32              = 128000
llama_model_loader: - kv  19:                tokenizer.ggml.eos_token_id u32              = 128001
llama_model_loader: - kv  20:                    tokenizer.chat_template str              = {% set loop_messages = messages %}{% ...
llama_model_loader: - type  f32:   65 tensors
llama_model_loader: - type  f16:  226 tensors
[   1/ 291]                    token_embd.weight - [ 4096, 128256,     1,     1], type =    f16, converting to q4_K .. size =  1002.00 MiB ->   281.81 MiB
[   2/ 291]               blk.0.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[   3/ 291]                blk.0.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB
[   4/ 291]                blk.0.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[   5/ 291]                  blk.0.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[   6/ 291]                blk.0.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[   7/ 291]                  blk.0.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[   8/ 291]             blk.0.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[   9/ 291]                  blk.0.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[  10/ 291]                  blk.0.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB
[  11/ 291]               blk.1.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[  12/ 291]                blk.1.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB
[  13/ 291]                blk.1.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  14/ 291]                  blk.1.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  15/ 291]                blk.1.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[  16/ 291]                  blk.1.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[  17/ 291]             blk.1.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[  18/ 291]                  blk.1.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[  19/ 291]                  blk.1.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB
[  20/ 291]               blk.2.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[  21/ 291]                blk.2.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB
[  22/ 291]                blk.2.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  23/ 291]                  blk.2.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  24/ 291]                blk.2.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[  25/ 291]                  blk.2.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[  26/ 291]             blk.2.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[  27/ 291]                  blk.2.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[  28/ 291]                  blk.2.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB
[  29/ 291]               blk.3.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[  30/ 291]                blk.3.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB
[  31/ 291]                blk.3.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  32/ 291]                  blk.3.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  33/ 291]                blk.3.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[  34/ 291]                  blk.3.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[  35/ 291]             blk.3.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[  36/ 291]                  blk.3.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[  37/ 291]                  blk.3.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB
[  38/ 291]               blk.4.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[  39/ 291]                blk.4.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  40/ 291]                blk.4.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  41/ 291]                  blk.4.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  42/ 291]                blk.4.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[  43/ 291]                  blk.4.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[  44/ 291]             blk.4.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[  45/ 291]                  blk.4.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[  46/ 291]                  blk.4.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[  47/ 291]               blk.5.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[  48/ 291]                blk.5.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  49/ 291]                blk.5.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  50/ 291]                  blk.5.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  51/ 291]                blk.5.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[  52/ 291]                  blk.5.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[  53/ 291]             blk.5.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[  54/ 291]                  blk.5.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[  55/ 291]                  blk.5.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[  56/ 291]               blk.6.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[  57/ 291]                blk.6.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB
[  58/ 291]                blk.6.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  59/ 291]                  blk.6.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  60/ 291]                blk.6.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[  61/ 291]                  blk.6.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[  62/ 291]             blk.6.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[  63/ 291]                  blk.6.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[  64/ 291]                  blk.6.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB
[  65/ 291]               blk.7.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[  66/ 291]                blk.7.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  67/ 291]                blk.7.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  68/ 291]                  blk.7.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  69/ 291]                blk.7.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[  70/ 291]                  blk.7.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[  71/ 291]             blk.7.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[  72/ 291]                  blk.7.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[  73/ 291]                  blk.7.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[  74/ 291]               blk.8.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[  75/ 291]                blk.8.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  76/ 291]                blk.8.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  77/ 291]                  blk.8.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  78/ 291]                blk.8.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[  79/ 291]                  blk.8.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[  80/ 291]             blk.8.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[  81/ 291]                  blk.8.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[  82/ 291]                  blk.8.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[  83/ 291]              blk.10.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[  84/ 291]               blk.10.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB
[  85/ 291]               blk.10.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  86/ 291]                 blk.10.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  87/ 291]               blk.10.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[  88/ 291]                 blk.10.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[  89/ 291]            blk.10.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[  90/ 291]                 blk.10.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[  91/ 291]                 blk.10.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB
[  92/ 291]              blk.11.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[  93/ 291]               blk.11.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  94/ 291]               blk.11.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  95/ 291]                 blk.11.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[  96/ 291]               blk.11.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[  97/ 291]                 blk.11.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[  98/ 291]            blk.11.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[  99/ 291]                 blk.11.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 100/ 291]                 blk.11.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 101/ 291]              blk.12.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 102/ 291]               blk.12.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 103/ 291]               blk.12.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 104/ 291]                 blk.12.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 105/ 291]               blk.12.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 106/ 291]                 blk.12.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 107/ 291]            blk.12.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 108/ 291]                 blk.12.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 109/ 291]                 blk.12.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 110/ 291]              blk.13.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 111/ 291]               blk.13.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB
[ 112/ 291]               blk.13.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 113/ 291]                 blk.13.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 114/ 291]               blk.13.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 115/ 291]                 blk.13.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 116/ 291]            blk.13.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 117/ 291]                 blk.13.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 118/ 291]                 blk.13.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB
[ 119/ 291]              blk.14.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 120/ 291]               blk.14.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 121/ 291]               blk.14.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 122/ 291]                 blk.14.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 123/ 291]               blk.14.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 124/ 291]                 blk.14.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 125/ 291]            blk.14.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 126/ 291]                 blk.14.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 127/ 291]                 blk.14.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 128/ 291]              blk.15.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 129/ 291]               blk.15.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 130/ 291]               blk.15.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 131/ 291]                 blk.15.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 132/ 291]               blk.15.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 133/ 291]                 blk.15.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 134/ 291]            blk.15.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 135/ 291]                 blk.15.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 136/ 291]                 blk.15.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 137/ 291]              blk.16.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 138/ 291]               blk.16.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB
[ 139/ 291]               blk.16.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 140/ 291]                 blk.16.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 141/ 291]               blk.16.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 142/ 291]                 blk.16.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 143/ 291]            blk.16.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 144/ 291]                 blk.16.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 145/ 291]                 blk.16.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB
[ 146/ 291]              blk.17.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 147/ 291]               blk.17.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 148/ 291]               blk.17.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 149/ 291]                 blk.17.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 150/ 291]               blk.17.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 151/ 291]                 blk.17.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 152/ 291]            blk.17.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 153/ 291]                 blk.17.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 154/ 291]                 blk.17.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 155/ 291]              blk.18.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 156/ 291]               blk.18.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 157/ 291]               blk.18.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 158/ 291]                 blk.18.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 159/ 291]               blk.18.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 160/ 291]                 blk.18.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 161/ 291]            blk.18.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 162/ 291]                 blk.18.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 163/ 291]                 blk.18.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 164/ 291]              blk.19.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 165/ 291]               blk.19.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB
[ 166/ 291]               blk.19.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 167/ 291]                 blk.19.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 168/ 291]               blk.19.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 169/ 291]                 blk.19.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 170/ 291]            blk.19.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 171/ 291]                 blk.19.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 172/ 291]                 blk.19.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB
[ 173/ 291]               blk.20.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 174/ 291]                 blk.20.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 175/ 291]            blk.20.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 176/ 291]                 blk.20.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 177/ 291]                 blk.20.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 178/ 291]               blk.9.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 179/ 291]                blk.9.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 180/ 291]                blk.9.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 181/ 291]                  blk.9.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 182/ 291]                blk.9.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 183/ 291]                  blk.9.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 184/ 291]             blk.9.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 185/ 291]                  blk.9.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 186/ 291]                  blk.9.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 187/ 291]              blk.20.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 188/ 291]               blk.20.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 189/ 291]                 blk.20.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 190/ 291]               blk.20.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 191/ 291]              blk.21.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 192/ 291]               blk.21.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB
[ 193/ 291]               blk.21.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 194/ 291]                 blk.21.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 195/ 291]               blk.21.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 196/ 291]                 blk.21.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 197/ 291]            blk.21.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 198/ 291]                 blk.21.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 199/ 291]                 blk.21.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB
[ 200/ 291]              blk.22.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 201/ 291]               blk.22.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 202/ 291]               blk.22.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 203/ 291]                 blk.22.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 204/ 291]               blk.22.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 205/ 291]                 blk.22.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 206/ 291]            blk.22.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 207/ 291]                 blk.22.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 208/ 291]                 blk.22.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 209/ 291]              blk.23.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 210/ 291]               blk.23.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 211/ 291]               blk.23.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 212/ 291]                 blk.23.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 213/ 291]               blk.23.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 214/ 291]                 blk.23.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 215/ 291]            blk.23.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 216/ 291]                 blk.23.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 217/ 291]                 blk.23.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 218/ 291]              blk.24.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 219/ 291]               blk.24.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB
[ 220/ 291]               blk.24.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 221/ 291]                 blk.24.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 222/ 291]               blk.24.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 223/ 291]                 blk.24.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 224/ 291]            blk.24.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 225/ 291]                 blk.24.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 226/ 291]                 blk.24.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB
[ 227/ 291]              blk.25.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 228/ 291]               blk.25.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 229/ 291]               blk.25.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 230/ 291]                 blk.25.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 231/ 291]               blk.25.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 232/ 291]                 blk.25.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 233/ 291]            blk.25.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 234/ 291]                 blk.25.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 235/ 291]                 blk.25.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 236/ 291]              blk.26.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 237/ 291]               blk.26.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 238/ 291]               blk.26.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 239/ 291]                 blk.26.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 240/ 291]               blk.26.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 241/ 291]                 blk.26.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 242/ 291]            blk.26.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 243/ 291]                 blk.26.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 244/ 291]                 blk.26.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 245/ 291]              blk.27.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 246/ 291]               blk.27.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB
[ 247/ 291]               blk.27.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 248/ 291]                 blk.27.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 249/ 291]               blk.27.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 250/ 291]                 blk.27.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 251/ 291]            blk.27.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 252/ 291]                 blk.27.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 253/ 291]                 blk.27.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB
[ 254/ 291]              blk.28.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 255/ 291]               blk.28.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB
[ 256/ 291]               blk.28.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 257/ 291]                 blk.28.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 258/ 291]               blk.28.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 259/ 291]                 blk.28.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 260/ 291]            blk.28.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 261/ 291]                 blk.28.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 262/ 291]                 blk.28.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB
[ 263/ 291]              blk.29.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 264/ 291]               blk.29.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB
[ 265/ 291]               blk.29.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 266/ 291]                 blk.29.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 267/ 291]               blk.29.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 268/ 291]                 blk.29.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 269/ 291]            blk.29.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 270/ 291]                 blk.29.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 271/ 291]                 blk.29.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB
[ 272/ 291]              blk.30.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 273/ 291]               blk.30.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB
[ 274/ 291]               blk.30.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 275/ 291]                 blk.30.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 276/ 291]               blk.30.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 277/ 291]                 blk.30.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 278/ 291]            blk.30.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 279/ 291]                 blk.30.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 280/ 291]                 blk.30.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB
[ 281/ 291]               blk.31.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 282/ 291]                 blk.31.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB
[ 283/ 291]                 blk.31.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB
[ 284/ 291]            blk.31.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 285/ 291]                 blk.31.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB
[ 286/ 291]                 blk.31.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB
[ 287/ 291]                        output.weight - [ 4096, 128256,     1,     1], type =    f16, converting to q6_K .. size =  1002.00 MiB ->   410.98 MiB
[ 288/ 291]              blk.31.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 289/ 291]               blk.31.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB
[ 290/ 291]               blk.31.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
[ 291/ 291]                   output_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB
llama_model_quantize_internal: model size  = 15317.02 MB
llama_model_quantize_internal: quant size  =  4685.30 MB

main: quantize time = 24238.23 ms
main:    total time = 24238.24 ms

# start inference on a gguf model
!./main -ngl 33 -m .models/8B-v3-instruct/ggml-model-Q4_K_M.gguf.gguf -n 128

Log start
main: build = 2778 (3ea0d360)
main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.4.0
main: seed  = 1714611377
llama_model_load: error loading model: llama_model_loader: failed to load model from .models/8B-v3-instruct/ggml-model-Q4_K_M.gguf.gguf

llama_load_model_from_file: failed to load model
llama_init_from_gpt_params: error: failed to load model '.models/8B-v3-instruct/ggml-model-Q4_K_M.gguf.gguf'
main: error: unable to load model

abasu0713 · 2024-05-02T01:32:30Z

Llama

@XiongjieDai are you using the right file name? Cause the quantized model it's written ends only with .gguf. Hope this helps

XiongjieDai · 2024-05-02T01:35:49Z

Llama

@XiongjieDai are you using the right file name? Cause the quantized model it's written ends only with .gguf. Hope this helps

Oh man, sorry about the typo. No, it is still not working.

# start inference on a gguf model
!./main -ngl 33 -m .models/8B-v3-instruct/ggml-model-Q4_K_M.gguf -n 128

Log start
main: build = 2778 (3ea0d360)
main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.4.0
main: seed  = 1714613628
llama_model_load: error loading model: llama_model_loader: failed to load model from .models/8B-v3-instruct/ggml-model-Q4_K_M.gguf

llama_load_model_from_file: failed to load model
llama_init_from_gpt_params: error: failed to load model '.models/8B-v3-instruct/ggml-model-Q4_K_M.gguf'
main: error: unable to load model

teleprint-me · 2024-05-02T01:36:03Z

@abasu0713 @XiongjieDai is using the wrong script.

python3 convert.py

Should use HF script instead.

python convert-hf-to-gguf.py

Use the HF model too, not the one distributed from meta.

https://huggingface.co/meta-llama/Meta-Llama-3-8B

It will work afterwards.

XiongjieDai · 2024-05-02T01:47:11Z

Sorry for bothering you guys. It's just a lack of slash in the path... Thank you for your prompt reply!

teleprint-me · 2024-05-02T02:09:16Z

Trust me, you're not alone 😅. I don't know how many times I've been stymied by a '/' or a '\'.

oldgithubman · 2024-05-02T05:32:15Z

@abasu0713 @XiongjieDai is using the wrong script.
python3 convert.py
Should use HF script instead.
python convert-hf-to-gguf.py
Use the HF model too, not the one distributed from meta.

https://huggingface.co/meta-llama/Meta-Llama-3-8B

It will work afterwards.

The documentation really needs to be better. How many resources are being wasted because the documentation is telling people to use convert.py? I know it just cost me about a day. Why isn't there just one interface anyway? Very confusing. I've been making my own quants for months now and I still don't know which one to use when. There should be only one interface and the documentation should be up-to-date and accurate. Crazy ideas, I know

LostRuins · 2024-05-02T05:33:59Z

Actually I've been wondering, what's the purpose of the convert.py script? If the hf one does everything needed, should convert.py be removed?

oldgithubman · 2024-05-02T05:36:06Z

Actually I've been wondering, what's the purpose of the convert.py script? If the hf one does everything needed, should convert.py be removed?

Well, according to the main readme, that's the only one you should even know about and use. I feel like I'm taking crazy pills

dranger003 · 2024-05-02T11:41:56Z

cc @dranger003 - I really appreciated your ppl chart visual + measured ppl gap table for different quantization types for CommandR+. Do you think you would be willing to recreate those comparisons on L3 70b (base or Instruct, preferably base?) Thanks

Thanks, sorry for the late response, just saw this one.
I'll see if I can find some time to do it.

teleprint-me · 2024-05-02T14:25:30Z

The purpose of the convert.py script is to partially load tensors dynamically as the conversion occurs. This reduces memory usage during the conversion process.

Normally, the entire models weights are loaded which is very RAM intensive. So thats why it exists. It should be easy to understand why this is valuable to have.

bartowski1182 · 2024-05-02T14:29:22Z

The purpose of the convert.py script is to partially load tensors dynamically as the conversion occurs. This reduces memory usage during the conversion process.

Normally, the entire models weights are loaded which is very RAM intensive. So thats why it exists. It should be easy to understand why this is valuable to have.

Is this true..? I need the full amount of RAM to load models when using convert-hf-to-gguf, multiple hundred GB for the biggest ones

teleprint-me · 2024-05-02T14:30:29Z

It depends on the model. Load a raw torch model (like a 7B one) and watch it as begins to consume about 40gb of RAM. Thats a lot of RAM. Just to load the model!

I think clraifying the scripts name would probably help with the confusion. Perhaps convert-torch.py would be more appropriate.

bartowski1182 · 2024-05-02T14:34:57Z

I load raw 7B models with only like 20gb of VRAM :S are you loading in FP32?

teleprint-me · 2024-05-02T15:20:35Z

Well, the other option would be bfloat, or half, right? Quants weren't as popular and as widely available when I originally tested it. This was about 2 years ago... wow, time flies. 💀

oldgithubman · 2024-05-02T19:56:08Z

I think clraifying the scripts name would probably help with the confusion. Perhaps convert-torch.py would be more appropriate.

A step in the right direction. Why not merge the scripts into a unified convert.py?

dranger003 · 2024-05-03T23:48:03Z

@kalomaze Here it is, this is using 400 chunks imatrix on wiki.train for quants below Q6_K.

Quantization	Size (GiB)	Perplexity (wiki.test)	Delta (FP16)
IQ1_S	14.29	9.8655 +/- 0.0625	248.51%
IQ1_M	15.60	8.5193 +/- 0.0530	200.95%
IQ2_XXS	17.79	6.6705 +/- 0.0405	135.64%
IQ2_XS	19.69	5.7486 +/- 0.0334	103.07%
IQ2_S	20.71	5.5215 +/- 0.0318	95.05%
Q2_K_S	22.79	5.4334 +/- 0.0325	91.94%
IQ2_M	22.46	4.8959 +/- 0.0276	72.95%
Q2_K	24.56	4.7763 +/- 0.0274	68.73%
IQ3_XXS	25.58	3.9671 +/- 0.0211	40.14%
IQ3_XS	27.29	3.7210 +/- 0.0191	31.45%
Q3_K_S	28.79	3.6502 +/- 0.0192	28.95%
IQ3_S	28.79	3.4698 +/- 0.0174	22.57%
IQ3_M	29.74	3.4402 +/- 0.0171	21.53%
Q3_K_M	31.91	3.3617 +/- 0.0172	18.75%
Q3_K_L	34.59	3.3016 +/- 0.0168	16.63%
IQ4_XS	35.30	3.0310 +/- 0.0149	7.07%
IQ4_NL	37.30	3.0261 +/- 0.0149	6.90%
Q4_K_S	37.58	3.0050 +/- 0.0148	6.15%
Q4_K_M	39.60	2.9674 +/- 0.0146	4.83%
Q5_K_S	45.32	2.8843 +/- 0.0141	1.89%
Q5_K_M	46.52	2.8656 +/- 0.0139	1.23%
Q6_K	53.91	2.8441 +/- 0.0138	0.47%
Q8_0	69.83	2.8316 +/- 0.0138	0.03%
F16	131.43	2.8308 +/- 0.0138	0.00%

mapleroyal · 2024-05-05T21:46:44Z

Does anyone have convert instructions that work - I'm trying both Meta and HF models using this PR and none of the convert scripts work:
$ python3.11 convert.py ~/Data/llama3/Meta-Llama-3-8B/ --outfile ./models/llama-8b-v3/ggml-model-f16.gguf --outtype f16 --vocab-type bpe

FileNotFoundError: Could not find a tokenizer matching any of ['bpe']
I see a few other people reporting the same problems. Those who succeeded - what were the necessary changes?

Did anyone find a solution to this?

python convert.py /Users/user/ai_models/Meta-Llama-3-70B-Instruct --vocab-type bpe
produces
FileNotFoundError: Could not find a tokenizer matching any of ['bpe']

dranger003 · 2024-05-05T21:56:13Z

@mapleroyal You should use convert-hf-to-gguf.py instead.

python convert-hf-to-gguf.py ./Meta-Llama-3-8B/ --outfile ggml-model-f16.gguf --outtype f16

mapleroyal · 2024-05-05T21:57:33Z

@dranger003

@mapleroyal You should use convert-hf-to-gguf.py instead.
python convert-hf-to-gguf.py ./Meta-Llama-3-8B/ --outfile ggml-model-f16.gguf --outtype f16

Even though I'm using the original meta (i.e. non-hf) model?

dranger003 · 2024-05-05T23:21:57Z

By original you mean the .pth? I don't think either of the convert script supports converting the pth weights, but it should work fine on the safetensors from Meta on HF.

mapleroyal · 2024-05-05T23:23:59Z

By original you mean the .pth? I don't think either of the convert script supports converting the pth weights, but it should work fine on the safetensors from Meta on HF.

Yes, exactly. Ok, got it. Thank you.

Support Llama 3 conversion

d79ab10

The tokenizer is BPE.

style

112c4c4

arch-btw mentioned this pull request Apr 18, 2024

llama3 family support #6747

Closed

artem-zinnatullin mentioned this pull request Apr 19, 2024

Can you support llama3? ollama/ollama#3735

Closed

dlyz mentioned this pull request Apr 30, 2024

LLAMA 3 SciSharp/LLamaSharp#679

Closed

ProjectAtlantis-dev mentioned this pull request May 2, 2024

Cannot convert llama3 8b model to gguf #7021

Closed

mofosyne added Review Complexity : High Generally require indepth knowledge of LLMs or GPUs enhancement New feature or request labels May 10, 2024

adrianpuiu mentioned this pull request May 10, 2024

NikolayKozloff/Meta-Llama-3-8B-Instruct-bf16-correct-pre-tokenizer-and-EOS-token-Q8_0-GGUF ollama/ollama#4319

Open

aleloi mentioned this pull request May 17, 2024

convert.py still fails on llama3 8B-Instruct downloaded directly from Meta (Huggingface works) #7339

Closed

cebtenzzre mentioned this pull request May 28, 2024

Bug: Some "code" models invoke undefined behavior at load time after #6745 #7592

Closed

jxmai mentioned this pull request Aug 21, 2024

Support llama-3 PromtEngineer/localGPT#789

Open

Support Llama 3 conversion #6745

Support Llama 3 conversion #6745

Conversation

pcuenca commented Apr 18, 2024

osanseviero commented Apr 18, 2024

Josh-XT commented Apr 18, 2024

m18coppola commented Apr 18, 2024

USBhost commented Apr 18, 2024 • edited Loading

mchiang0610 commented Apr 18, 2024 • edited by JohannesGaessler Loading

pcuenca commented Apr 18, 2024

jxy commented Apr 18, 2024

USBhost commented Apr 18, 2024

pcuenca commented Apr 18, 2024

mchiang0610 commented Apr 18, 2024

pcuenca commented Apr 18, 2024

USBhost commented Apr 18, 2024

ddh0 commented Apr 18, 2024 • edited Loading

jxy commented Apr 18, 2024

teleprint-me commented Apr 18, 2024 • edited Loading

bullno1 commented Apr 18, 2024 • edited Loading

teleprint-me commented Apr 18, 2024 • edited Loading

bullno1 commented Apr 18, 2024 • edited Loading

dranger003 commented Apr 18, 2024

teleprint-me commented Apr 18, 2024 • edited Loading

kalomaze commented Apr 19, 2024 • edited Loading

bullno1 commented Apr 19, 2024

ggerganov commented Apr 19, 2024

ddh0 commented Apr 19, 2024

ggerganov commented Apr 19, 2024

dranger003 commented Apr 19, 2024

XiongjieDai commented May 2, 2024

abasu0713 commented May 2, 2024

XiongjieDai commented May 2, 2024

teleprint-me commented May 2, 2024 • edited Loading

XiongjieDai commented May 2, 2024 • edited Loading

teleprint-me commented May 2, 2024

oldgithubman commented May 2, 2024

LostRuins commented May 2, 2024

oldgithubman commented May 2, 2024

dranger003 commented May 2, 2024

teleprint-me commented May 2, 2024 • edited Loading

bartowski1182 commented May 2, 2024

teleprint-me commented May 2, 2024 • edited Loading

bartowski1182 commented May 2, 2024

teleprint-me commented May 2, 2024

oldgithubman commented May 2, 2024

dranger003 commented May 3, 2024

mapleroyal commented May 5, 2024

dranger003 commented May 5, 2024

mapleroyal commented May 5, 2024

dranger003 commented May 5, 2024

mapleroyal commented May 5, 2024

USBhost commented Apr 18, 2024 •

edited

Loading

mchiang0610 commented Apr 18, 2024 •

edited by JohannesGaessler

Loading

ddh0 commented Apr 18, 2024 •

edited

Loading

teleprint-me commented Apr 18, 2024 •

edited

Loading

bullno1 commented Apr 18, 2024 •

edited

Loading

teleprint-me commented Apr 18, 2024 •

edited

Loading

bullno1 commented Apr 18, 2024 •

edited

Loading

teleprint-me commented Apr 18, 2024 •

edited

Loading

kalomaze commented Apr 19, 2024 •

edited

Loading

teleprint-me commented May 2, 2024 •

edited

Loading

XiongjieDai commented May 2, 2024 •

edited

Loading

teleprint-me commented May 2, 2024 •

edited

Loading

teleprint-me commented May 2, 2024 •

edited

Loading