server: use random media marker (#21962)

* server: use random media marker

* nits

* remove legacy <__image__> token

* revert special char in random
This commit is contained in:
Xuan-Son Nguyen
2026-04-15 23:52:22 +02:00
committed by GitHub
parent b3d758750a
commit 408225bb1a
5 changed files with 17 additions and 11 deletions
+11 -3
View File
@@ -84,6 +84,14 @@ std::string gen_tool_call_id() {
return random_string();
}
static std::string media_marker = "";
const char * get_media_marker() {
if (media_marker.empty()) {
media_marker = "<__media_" + random_string() + "__>";
}
return media_marker.c_str();
}
//
// lora utils
//
@@ -975,7 +983,7 @@ json oaicompat_chat_params_parse(
handle_media(out_files, image_url, opt.media_path);
p["type"] = "media_marker";
p["text"] = mtmd_default_marker();
p["text"] = get_media_marker();
p.erase("image_url");
} else if (type == "input_audio") {
@@ -996,7 +1004,7 @@ json oaicompat_chat_params_parse(
// TODO: add audio_url support by reusing handle_media()
p["type"] = "media_marker";
p["text"] = mtmd_default_marker();
p["text"] = get_media_marker();
p.erase("input_audio");
} else if (type != "text") {
@@ -1460,7 +1468,7 @@ json convert_transcriptions_to_chatcmpl(
if (!language.empty()) {
prompt += string_format(" (language: %s)", language.c_str());
}
prompt += mtmd_default_marker();
prompt += get_media_marker();
json chatcmpl_body = inp_body; // copy all fields
chatcmpl_body["messages"] = json::array({
+3
View File
@@ -92,6 +92,9 @@ std::string random_string();
std::string gen_chatcmplid();
std::string gen_tool_call_id();
// get a random marker; note: each time the server restarts, the marker will be different
const char * get_media_marker();
//
// lora utils
//
+1
View File
@@ -708,6 +708,7 @@ private:
mparams.warmup = params_base.warmup;
mparams.image_min_tokens = params_base.image_min_tokens;
mparams.image_max_tokens = params_base.image_max_tokens;
mparams.media_marker = get_media_marker();
mctx = mtmd_init_from_file(mmproj_path.c_str(), model, mparams);
if (mctx == nullptr) {