Skip to content
This repository has been archived by the owner on Jun 24, 2024. It is now read-only.

Develop #442

Closed
wants to merge 63 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
63 commits
Select commit Hold shift + click to select a range
7a10cfb
add bert model
oppiliappan Jul 31, 2023
ffb0519
refactor: move ggml format to module
philpax Aug 20, 2023
d5c2562
fix(ggml): use byte-arrays for magic
philpax Aug 20, 2023
dd7aa26
feat(ggml): impl unwired gguf loader
philpax Aug 20, 2023
b9b1391
Remove error on context window overflow
Andreybest Aug 23, 2023
e166b7c
feat(gguf): gguf-v2 support
philpax Aug 27, 2023
90c6797
chore(gguf): clippy fix
philpax Aug 27, 2023
ddf4e40
Merge branch 'main' into gguf
philpax Aug 27, 2023
38dd730
fix(gguf): drop the null terminator
philpax Aug 27, 2023
41462ed
refactor(ggml): begin decoupling the old formats
philpax Aug 27, 2023
2de2df7
feat(bin): add gguf-explorer as debugging tool
philpax Aug 27, 2023
0da661f
refactor(gguf): split metadata out + use macros
philpax Aug 28, 2023
e182444
wip: rewrite loader to use GGUF; almost wire up llama
philpax Aug 28, 2023
178a0fb
fix(cli): use info log level
philpax Aug 28, 2023
2a9417a
wip: successfully load a llama2 gguf*
philpax Aug 28, 2023
823828d
wip: disable everything that's broken
philpax Aug 28, 2023
eb8c508
feat(llama): validate tensor data layout
philpax Sep 6, 2023
99a9fb4
Add "context swap" functions to session and add "decoded_tokens" to s…
Andreybest Sep 12, 2023
6835335
Build a against newer GGML version
LLukas22 Sep 16, 2023
1eb0d79
Update llama-cpp
LLukas22 Sep 16, 2023
ad136e1
Include `ggml-alloc.c` during build
LLukas22 Sep 16, 2023
fd3ff64
Merge remote-tracking branch 'upstream/main' into feat/ggml-update
LLukas22 Sep 22, 2023
ab381c7
Hopefully fix linux build
LLukas22 Sep 22, 2023
4ebb16e
Remove Scratch Buffers
LLukas22 Sep 23, 2023
995dd79
Use `GraphAllocator` in LLaMA architecture
LLukas22 Sep 23, 2023
6ba5126
Working graph allocator for `llama`
LLukas22 Sep 24, 2023
78b0e25
Scope `input_length` and `session_len` to `BuildContext`
LLukas22 Sep 26, 2023
8ad589b
Logging + `mpt` tests
LLukas22 Sep 30, 2023
e506b0b
Try to set the cuda scratch offset
LLukas22 Sep 30, 2023
f398ebd
feat(llm): remove architecture param
philpax Oct 8, 2023
588eb98
feat(ggml): use newtype for metadata
philpax Oct 8, 2023
388fa87
feat(llm): first pass at tokenizer re-port
philpax Oct 8, 2023
f827517
fix(llm): embedded tokenizer decode
philpax Oct 22, 2023
58cb8cc
wip(llm): reconvert ggml tokenizer with GPT-4
philpax Oct 23, 2023
43ebc3d
Merge branch 'main' into gguf
philpax Oct 23, 2023
e4db5b9
fix(ggml/llmb): use IndexMap for GGUF
philpax Oct 23, 2023
8996061
fix(llmb): disable embedded tokenizer
philpax Oct 29, 2023
34379ac
refactor: move loading logic back into llmb, simplify
philpax Oct 29, 2023
a4bbdbf
feat: implement GGUF write / llm gguf rebuild
philpax Oct 29, 2023
df1aa0e
feat(llm): implement gguf add-hf-tokenizer
philpax Oct 29, 2023
d5e7b61
fix(gguf): add support for ggufv3
philpax Oct 30, 2023
5457414
fix(gguf): load bools correctly
philpax Oct 30, 2023
6114076
feat(llm): get GPT-NeoX loading again
philpax Oct 30, 2023
8961ff7
feat(llm): get GPT-NeoX closer to working
philpax Oct 30, 2023
be709ed
feat(llm): more attempted GPT-NeoX fixes
philpax Oct 30, 2023
a728852
fix(cli): in info, elide known large items
philpax Oct 30, 2023
5ed38be
fix(llmb): usercallback show error
philpax Oct 31, 2023
fcbfb4d
fix(ggml): bindgen issues
philpax Nov 1, 2023
52c2bb6
Merge pull request #398 from nerdypepper/add-bert-arch
philpax Nov 12, 2023
5e4b35f
Merge branch 'develop' into feat/ggml-update
philpax Nov 12, 2023
e5e0fe1
Merge pull request #428 from LLukas22/feat/ggml-update
philpax Nov 12, 2023
2e3c6f7
Merge branch 'develop' into andreybest-main
philpax Nov 12, 2023
2c127df
Merge pull request #424 from Andreybest/main
philpax Nov 12, 2023
eddf953
chore: fix precommit
philpax Nov 12, 2023
ab956c9
Merge in develop
philpax Nov 12, 2023
7c3d1cf
chore: fix precommit
philpax Nov 12, 2023
4401631
Merge branch 'develop' into gguf
philpax Nov 12, 2023
535eda1
Merge pull request #412 from rustformers/gguf
philpax Nov 12, 2023
d318da1
Merge branch 'main' into develop
philpax Nov 29, 2023
bd63f63
wip: update ggml crate only
philpax Nov 19, 2023
ce927dd
Start porting llama.cpp + simple.cpp to Rust
philpax Nov 19, 2023
ab85bf6
feat(lcsc): start implementing llama functions
philpax Nov 19, 2023
00aaf4a
chore: update llama-cpp
philpax Nov 29, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,754 changes: 1,732 additions & 22 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ members = [
"crates/ggml/sys",
"crates/llm",
"crates/llm-base",
"crates/llama-cpp-simple-clone",
"crates/models/*",
"binaries/*",
]
Expand Down Expand Up @@ -33,6 +34,7 @@ memmap2 = "0.5.10"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
tracing = { version = "0.1", features = ["log"] }
llm-samplers = "=0.0.7"
indexmap = "2.0.2"

# Config for 'cargo dist'
[workspace.metadata.dist]
Expand Down
11 changes: 9 additions & 2 deletions binaries/generate-ggml-bindings/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,12 @@ fn generate_main(ggml_path: &Path, src_path: &Path) {
let bindings = bindgen::Builder::default()
.header(ggml_path.join("ggml.h").to_str().unwrap().to_string())
.allowlist_file(r".*ggml.h")
.header(ggml_path.join("k_quants.h").to_string_lossy())
.allowlist_file(r".*k_quants.h")
.header(ggml_path.join("ggml-quants.h").to_string_lossy())
.allowlist_file(r".*ggml-quants.h")
.header(ggml_path.join("ggml-alloc.h").to_string_lossy())
.allowlist_file(r".*ggml-alloc.h")
.header(ggml_path.join("ggml-backend.h").to_string_lossy())
.allowlist_file(r".*ggml-backend.h")
// Suppress some warnings
.raw_line("#![allow(non_upper_case_globals)]")
.raw_line("#![allow(non_camel_case_types)]")
Expand Down Expand Up @@ -88,6 +92,9 @@ fn generate_metal(ggml_path: &Path, src_path: &Path) {
generate_extra("metal", ggml_path, src_path, |b| {
b.header(ggml_path.join("ggml-metal.h").to_string_lossy())
.allowlist_file(r".*ggml-metal\.h")
.raw_line("use super::ggml_cgraph;")
.raw_line("use super::ggml_log_callback;")
.raw_line("use super::ggml_tensor;")
});
}

Expand Down
18 changes: 18 additions & 0 deletions binaries/gguf-explorer/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[package]
name = "gguf-explorer"
version = "0.1.0"
edition = "2021"
repository = { workspace = true }
license = { workspace = true }
publish = false

[package.metadata.release]
release = false

[dependencies]
ggml = { path = "../../crates/ggml" }

anyhow = { workspace = true }

eframe = "0.22"
egui_extras = "0.22"
220 changes: 220 additions & 0 deletions binaries/gguf-explorer/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
use std::{fmt::Display, fs::File, io::BufReader};

use egui_extras::{Column, TableBuilder};
use ggml::format::gguf::{self, Gguf};

use eframe::egui::{self, Button, CentralPanel, CollapsingHeader, Label, RichText, TopBottomPanel};

fn main() -> eframe::Result<()> {
let file_path = match std::env::args().nth(1) {
Some(path) => path,
None => {
eprintln!("Usage: gguf-explorer <path-to-gguf-file>");
std::process::exit(1);
}
};

let mut file = File::open(file_path).expect("Failed to open file");
let gguf = Gguf::load(&mut BufReader::new(&mut file)).expect("Failed to load gguf file");

let native_options = eframe::NativeOptions::default();
eframe::run_native(
"GGUF Explorer",
native_options,
Box::new(move |_cc| {
Box::new(Explorer {
_file: file,
gguf,

selected_tab: Tab::Metadata,
tensor_sort_order: TensorColumn::Offset,
})
}),
)
}

#[derive(Clone, Copy, PartialEq, Eq, Hash)]
pub enum Tab {
Metadata,
Tensors,
}
impl Display for Tab {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Tab::Metadata => write!(f, "Metadata"),
Tab::Tensors => write!(f, "Tensors"),
}
}
}
impl Tab {
const ALL: [Tab; 2] = [Tab::Metadata, Tab::Tensors];
}

#[derive(Clone, Copy, PartialEq, Eq, Hash)]
pub enum TensorColumn {
Name,
Dimensions,
Type,
Offset,
}
impl Display for TensorColumn {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TensorColumn::Name => write!(f, "Name"),
TensorColumn::Dimensions => write!(f, "Dimensions"),
TensorColumn::Type => write!(f, "Type"),
TensorColumn::Offset => write!(f, "Offset"),
}
}
}
impl TensorColumn {
const ALL: [Self; 4] = [Self::Name, Self::Dimensions, Self::Type, Self::Offset];
}

struct Explorer {
_file: File,
gguf: Gguf,

selected_tab: Tab,
tensor_sort_order: TensorColumn,
}
impl eframe::App for Explorer {
fn update(&mut self, ctx: &egui::Context, _frame: &mut eframe::Frame) {
TopBottomPanel::top("top_panel").show(ctx, |ui| {
ui.horizontal(|ui| {
for tab in Tab::ALL.iter().copied() {
let text = RichText::from(tab.to_string());
let text = if tab == self.selected_tab {
text.underline()
} else {
text
};

if ui.add(Button::new(text)).clicked() {
self.selected_tab = tab;
}
}
});
});

CentralPanel::default().show(ctx, |ui| match self.selected_tab {
Tab::Metadata => {
self.render_metadata(ui);
}
Tab::Tensors => {
self.render_tensors(ui);
}
});
}
}
impl Explorer {
fn render_metadata(&mut self, ui: &mut egui::Ui) {
let metadata = &self.gguf.metadata;
let mut metadata_keys = metadata.keys().collect::<Vec<_>>();
metadata_keys.sort_by_key(|k| *k);

TableBuilder::new(ui)
.striped(true)
.auto_shrink([false, true])
.column(Column::auto().resizable(true))
.column(Column::remainder().resizable(true))
.header(20.0, |mut header| {
header.col(|ui| {
ui.label("Key");
});
header.col(|ui| {
ui.label("Value");
});
})
.body(|mut body| {
for key in metadata_keys {
let value = metadata.get_optional(key).unwrap();

body.row(30.0, |mut row| {
row.col(|ui| {
ui.add(Label::new(monospace(key)).wrap(false));
});
row.col(|ui| match value {
gguf::MetadataValue::Array(value) => {
CollapsingHeader::new(format!("array ({} elements)", value.len()))
.id_source(key)
.show(ui, |ui| {
ui.add(
Label::new(monospace(format!("{:?}", value)))
.wrap(false),
);
});
}
value => {
ui.add(Label::new(monospace(format!("{:?}", value))).wrap(false));
}
});
});
}
});
}

fn render_tensors(&mut self, ui: &mut egui::Ui) {
let tensors = &self.gguf.tensor_infos;
let mut tensor_names = tensors.keys().collect::<Vec<_>>();
match self.tensor_sort_order {
TensorColumn::Name => tensor_names.sort_by_key(|k| *k),
TensorColumn::Dimensions => {
tensor_names.sort_by_key(|k| tensors[*k].dimensions.clone())
}
TensorColumn::Type => tensor_names.sort_by_key(|k| tensors[*k].element_type),
TensorColumn::Offset => tensor_names.sort_by_key(|k| tensors[*k].offset),
}

TableBuilder::new(ui)
.striped(true)
.auto_shrink([false, true])
.column(Column::remainder().resizable(true))
.columns(Column::auto().resizable(true), 3)
.header(20.0, |mut header| {
for column in TensorColumn::ALL.iter().copied() {
header.col(|ui| {
let text = RichText::from(column.to_string());
let text = if self.tensor_sort_order == column {
text.underline()
} else {
text
};

if ui.add(Button::new(text).wrap(false)).clicked() {
self.tensor_sort_order = column;
}
});
}
})
.body(|mut body| {
for tensor_name in tensor_names {
let tensor = &tensors[tensor_name];

body.row(30.0, |mut row| {
row.col(|ui| {
ui.add(Label::new(monospace(tensor_name)).wrap(false));
});
row.col(|ui| {
ui.add(
Label::new(monospace(format!("{:?}", tensor.dimensions)))
.wrap(false),
);
});
row.col(|ui| {
ui.add(
Label::new(monospace(tensor.element_type.to_string())).wrap(false),
);
});
row.col(|ui| {
ui.add(Label::new(monospace(tensor.offset.to_string())).wrap(false));
});
});
}
});
}
}

fn monospace(text: impl Into<String>) -> RichText {
RichText::new(text).monospace()
}
Loading
Loading