Skip to content

Commit

Permalink
tmp
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Oct 29, 2023
1 parent b4ad03b commit 66a54bf
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3142,6 +3142,7 @@ static struct ggml_cgraph * llm_build_llama(
if (batch.token) {
struct ggml_tensor * inp_tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_tokens);
ggml_set_name(inp_tokens, "inp_tokens");
ggml_allocr_alloc(lctx.alloc, inp_tokens);

inpL = ggml_get_rows(ctx0, model.tok_embeddings, inp_tokens);
} else {
Expand All @@ -3156,19 +3157,23 @@ static struct ggml_cgraph * llm_build_llama(
// KQ_scale
struct ggml_tensor * KQ_scale = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, 1);
ggml_set_name(KQ_scale, "KQ_scale");
ggml_allocr_alloc(lctx.alloc, KQ_scale);

// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
struct ggml_tensor * KQ_mask = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_kv, n_tokens, 1);
ggml_set_name(KQ_mask, "KQ_mask");
ggml_allocr_alloc(lctx.alloc, KQ_mask);

// KQ_pos - contains the positions
struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_tokens);
ggml_set_name(KQ_pos, "KQ_pos");
ggml_allocr_alloc(lctx.alloc, KQ_pos);

// shift the entire K-cache if needed
if (do_rope_shift) {
struct ggml_tensor * K_shift = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_ctx);
ggml_set_name(K_shift, "K_shift");
ggml_allocr_alloc(lctx.alloc, K_shift);

for (int il = 0; il < n_layer; ++il) {
struct ggml_tensor * tmp =
Expand Down Expand Up @@ -5523,7 +5528,7 @@ static struct ggml_cgraph * llama_build_graph(
}

// allocate memory and set the values for the input tensors of the graph
llama_build_graph_input(lctx, batch, result);
//llama_build_graph_input(lctx, batch, result);

//auto t_start = std::chrono::high_resolution_clock::now();

Expand Down

0 comments on commit 66a54bf

Please sign in to comment.