Skip to content

Commit

Permalink
Update chat to handle partial messages from streaming replies.
Browse files Browse the repository at this point in the history
  • Loading branch information
jmont-dev committed Aug 15, 2024
1 parent d93a7b3 commit 56a0b6b
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 8 deletions.
18 changes: 14 additions & 4 deletions include/ollama.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -495,13 +495,23 @@ class Ollama
std::string request_string = request.dump();
if (ollama::log_requests) std::cout << request_string << std::endl;

auto stream_callback = [on_receive_token](const char *data, size_t data_length)->bool{
std::shared_ptr<std::vector<std::string>> partial_responses = std::make_shared<std::vector<std::string>>();

auto stream_callback = [on_receive_token, partial_responses](const char *data, size_t data_length)->bool{

std::string message(data, data_length);
if (ollama::log_replies) std::cout << message << std::endl;
ollama::response response(message, ollama::message_type::chat);
if ( response.has_error() ) { if (ollama::use_exceptions) throw ollama::exception("Ollama response returned error: "+response.get_error() ); }
on_receive_token(response);
try
{
partial_responses->push_back(message);
std::string total_response = std::accumulate(partial_responses->begin(), partial_responses->end(), std::string(""));
ollama::response response(total_response, ollama::message_type::chat);
partial_responses->clear();

if ( response.has_error() ) { if (ollama::use_exceptions) throw ollama::exception("Ollama response returned error: "+response.get_error() ); }
on_receive_token(response);
}
catch (const ollama::invalid_json_exception& e) { /* Partial response was received. Will do nothing and attempt to concatenate with the next response. */ }

return true;
};
Expand Down
18 changes: 14 additions & 4 deletions singleheader/ollama.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35285,13 +35285,23 @@ class Ollama
std::string request_string = request.dump();
if (ollama::log_requests) std::cout << request_string << std::endl;

auto stream_callback = [on_receive_token](const char *data, size_t data_length)->bool{
std::shared_ptr<std::vector<std::string>> partial_responses = std::make_shared<std::vector<std::string>>();

auto stream_callback = [on_receive_token, partial_responses](const char *data, size_t data_length)->bool{

std::string message(data, data_length);
if (ollama::log_replies) std::cout << message << std::endl;
ollama::response response(message, ollama::message_type::chat);
if ( response.has_error() ) { if (ollama::use_exceptions) throw ollama::exception("Ollama response returned error: "+response.get_error() ); }
on_receive_token(response);
try
{
partial_responses->push_back(message);
std::string total_response = std::accumulate(partial_responses->begin(), partial_responses->end(), std::string(""));
ollama::response response(total_response, ollama::message_type::chat);
partial_responses->clear();

if ( response.has_error() ) { if (ollama::use_exceptions) throw ollama::exception("Ollama response returned error: "+response.get_error() ); }
on_receive_token(response);
}
catch (const ollama::invalid_json_exception& e) { /* Partial response was received. Will do nothing and attempt to concatenate with the next response. */ }

return true;
};
Expand Down

0 comments on commit 56a0b6b

Please sign in to comment.