diff --git a/include/ollama.hpp b/include/ollama.hpp index d3a3e15..2cd8e73 100644 --- a/include/ollama.hpp +++ b/include/ollama.hpp @@ -495,13 +495,23 @@ class Ollama std::string request_string = request.dump(); if (ollama::log_requests) std::cout << request_string << std::endl; - auto stream_callback = [on_receive_token](const char *data, size_t data_length)->bool{ + std::shared_ptr> partial_responses = std::make_shared>(); + + auto stream_callback = [on_receive_token, partial_responses](const char *data, size_t data_length)->bool{ std::string message(data, data_length); if (ollama::log_replies) std::cout << message << std::endl; - ollama::response response(message, ollama::message_type::chat); - if ( response.has_error() ) { if (ollama::use_exceptions) throw ollama::exception("Ollama response returned error: "+response.get_error() ); } - on_receive_token(response); + try + { + partial_responses->push_back(message); + std::string total_response = std::accumulate(partial_responses->begin(), partial_responses->end(), std::string("")); + ollama::response response(total_response, ollama::message_type::chat); + partial_responses->clear(); + + if ( response.has_error() ) { if (ollama::use_exceptions) throw ollama::exception("Ollama response returned error: "+response.get_error() ); } + on_receive_token(response); + } + catch (const ollama::invalid_json_exception& e) { /* Partial response was received. Will do nothing and attempt to concatenate with the next response. */ } return true; }; diff --git a/singleheader/ollama.hpp b/singleheader/ollama.hpp index 750ac9e..3f68add 100644 --- a/singleheader/ollama.hpp +++ b/singleheader/ollama.hpp @@ -35285,13 +35285,23 @@ class Ollama std::string request_string = request.dump(); if (ollama::log_requests) std::cout << request_string << std::endl; - auto stream_callback = [on_receive_token](const char *data, size_t data_length)->bool{ + std::shared_ptr> partial_responses = std::make_shared>(); + + auto stream_callback = [on_receive_token, partial_responses](const char *data, size_t data_length)->bool{ std::string message(data, data_length); if (ollama::log_replies) std::cout << message << std::endl; - ollama::response response(message, ollama::message_type::chat); - if ( response.has_error() ) { if (ollama::use_exceptions) throw ollama::exception("Ollama response returned error: "+response.get_error() ); } - on_receive_token(response); + try + { + partial_responses->push_back(message); + std::string total_response = std::accumulate(partial_responses->begin(), partial_responses->end(), std::string("")); + ollama::response response(total_response, ollama::message_type::chat); + partial_responses->clear(); + + if ( response.has_error() ) { if (ollama::use_exceptions) throw ollama::exception("Ollama response returned error: "+response.get_error() ); } + on_receive_token(response); + } + catch (const ollama::invalid_json_exception& e) { /* Partial response was received. Will do nothing and attempt to concatenate with the next response. */ } return true; };