Files
llama.cpp/tools/server/server-http.h
T
tha80 983ca8992e server: (router) Forward form-data to model server (Fixes #22044) (#22118)
* This commit enables the router to forward form-data to model server.
Fixes #22044 (enabling to use the /v1/audio/transcriptions in router mode)

* * Applied the suggestion from Copilots first comment: using the non-throwing json::parse overload.
* Addressed Copilots third comment by extending the files representation to also include filename and content-type
* Addressed Copilots fourth comment by making the RNG thread_local

* Changed variable body from std::string to std::ostringstream in build_multipart_body
as suggested by ngxson in https://github.com/ggml-org/llama.cpp/pull/22118#discussion_r3127099053

* Added sanitize_field lambda in build_multipart_body for key, filename and content_type
as suggested by ngxson in https://github.com/ggml-org/llama.cpp/pull/22118#discussion_r3127104647

* explicitly checking if value/item is string before calling value/item.get<std::string>()
as requested by ngxson in https://github.com/ggml-org/llama.cpp/pull/22118#discussion_r3127111279

* Added double quote to the sanitize lambda and throw on json parse failure

---------

Co-authored-by: Ralph Paßgang <ralph@trust-it.de>
2026-04-27 23:55:00 +02:00

90 lines
2.6 KiB
C++

#pragma once
#include <atomic>
#include <functional>
#include <map>
#include <string>
#include <thread>
#include <vector>
#include <cstdint>
struct common_params;
// generator-like API for HTTP response generation
// this object response with one of the 2 modes:
// 1) normal response: `data` contains the full response body
// 2) streaming response: each call to next(output) generates the next chunk
// when next(output) returns false, no more data after the current chunk
// note: some chunks can be empty, in which case no data is sent for that chunk
struct server_http_res {
std::string content_type = "application/json; charset=utf-8";
int status = 200;
std::string data;
std::map<std::string, std::string> headers;
// TODO: move this to a virtual function once we have proper polymorphism support
std::function<bool(std::string &)> next = nullptr;
bool is_stream() const {
return next != nullptr;
}
virtual ~server_http_res() = default;
};
// unique pointer, used by set_chunked_content_provider
// httplib requires the stream provider to be stored in heap
using server_http_res_ptr = std::unique_ptr<server_http_res>;
using raw_buffer = std::vector<uint8_t>;
struct uploaded_file {
raw_buffer data;
std::string filename;
std::string content_type;
};
struct server_http_req {
std::map<std::string, std::string> params; // path_params + query_params
std::map<std::string, std::string> headers; // used by MCP proxy
std::string path;
std::string query_string; // query parameters string (e.g. "action=save")
std::string body;
std::map<std::string, uploaded_file> files; // used for file uploads (form data)
const std::function<bool()> & should_stop;
std::string get_param(const std::string & key, const std::string & def = "") const {
auto it = params.find(key);
if (it != params.end()) {
return it->second;
}
return def;
}
};
struct server_http_context {
class Impl;
std::unique_ptr<Impl> pimpl;
std::thread thread; // server thread
std::atomic<bool> is_ready = false;
std::string path_prefix;
std::string hostname;
int port;
server_http_context();
~server_http_context();
bool init(const common_params & params);
bool start();
void stop() const;
// note: the handler should never throw exceptions
using handler_t = std::function<server_http_res_ptr(const server_http_req & req)>;
void get(const std::string & path, const handler_t & handler) const;
void post(const std::string & path, const handler_t & handler) const;
// for debugging
std::string listening_address;
};