Skip to content

Commit

Permalink
Merge pull request #21 from jmont-dev/manual_requests
Browse files Browse the repository at this point in the history
Support receiving partial responses over HTTP when streaming with the generate endpoint.
  • Loading branch information
jmont-dev committed Aug 14, 2024
2 parents 8977537 + a97aa99 commit c69a52d
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 8 deletions.
20 changes: 16 additions & 4 deletions include/ollama.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,10 @@
#include "Base64.h"

#include <string>
#include <memory>
#include <fstream>
#include <iostream>
#include <numeric>
#include <functional>
#include <exception>
#include <initializer_list>
Expand Down Expand Up @@ -421,13 +423,22 @@ class Ollama
std::string request_string = request.dump();
if (ollama::log_requests) std::cout << request_string << std::endl;

auto stream_callback = [on_receive_token](const char *data, size_t data_length)->bool{
std::shared_ptr<std::vector<std::string>> partial_responses = std::make_shared<std::vector<std::string>>();

auto stream_callback = [on_receive_token, partial_responses](const char *data, size_t data_length)->bool{

std::string message(data, data_length);
if (ollama::log_replies) std::cout << message << std::endl;
ollama::response response(message);
on_receive_token(response);

try
{
partial_responses->push_back(message);
std::string total_response = std::accumulate(partial_responses->begin(), partial_responses->end(), std::string(""));
ollama::response response(total_response);
partial_responses->clear();
on_receive_token(response);
}
catch (...) { /* Partial response was received. Will do nothing and attempt to concatenate with the next response. */ }

return true;
};

Expand Down Expand Up @@ -810,6 +821,7 @@ class Ollama
return true;
}


std::string server_url;
httplib::Client *cli;

Expand Down
20 changes: 16 additions & 4 deletions singleheader/ollama.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34852,8 +34852,10 @@ class Base64 {
*/

#include <string>
#include <memory>
#include <fstream>
#include <iostream>
#include <numeric>
#include <functional>
#include <exception>
#include <initializer_list>
Expand Down Expand Up @@ -35211,13 +35213,22 @@ class Ollama
std::string request_string = request.dump();
if (ollama::log_requests) std::cout << request_string << std::endl;

auto stream_callback = [on_receive_token](const char *data, size_t data_length)->bool{
std::shared_ptr<std::vector<std::string>> partial_responses = std::make_shared<std::vector<std::string>>();

auto stream_callback = [on_receive_token, partial_responses](const char *data, size_t data_length)->bool{

std::string message(data, data_length);
if (ollama::log_replies) std::cout << message << std::endl;
ollama::response response(message);
on_receive_token(response);

try
{
partial_responses->push_back(message);
std::string total_response = std::accumulate(partial_responses->begin(), partial_responses->end(), std::string(""));
ollama::response response(total_response);
partial_responses->clear();
on_receive_token(response);
}
catch (...) { /* Partial response was received. Will do nothing and attempt to concatenate with the next response. */ }

return true;
};

Expand Down Expand Up @@ -35600,6 +35611,7 @@ class Ollama
return true;
}


std::string server_url;
httplib::Client *cli;

Expand Down

0 comments on commit c69a52d

Please sign in to comment.