common/parser: add --skip-chat-parsing to force a pure content parser. (#20289)
* Add `--force-pure-content` to force a pure content parser. * Update common/arg.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * Change parameter name [no ci] --------- Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
committed by
GitHub
parent
054d8b0f24
commit
d2ecd2d1cf
@@ -3115,6 +3115,17 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||||||
params.chat_template = read_file(value);
|
params.chat_template = read_file(value);
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE_FILE"));
|
).set_examples({LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE_FILE"));
|
||||||
|
add_opt(common_arg(
|
||||||
|
{"--skip-chat-parsing"},
|
||||||
|
{"--no-skip-chat-parsing"},
|
||||||
|
string_format(
|
||||||
|
"force a pure content parser, even if a Jinja template is specified; model will output everything "
|
||||||
|
"in the content section, including any reasoning and/or tool calls (default: disabled)"
|
||||||
|
),
|
||||||
|
[](common_params & params, bool value) {
|
||||||
|
params.force_pure_content_parser = value;
|
||||||
|
}
|
||||||
|
).set_examples({LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_SKIP_CHAT_PARSING"));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{"--prefill-assistant"},
|
{"--prefill-assistant"},
|
||||||
{"--no-prefill-assistant"},
|
{"--no-prefill-assistant"},
|
||||||
|
|||||||
@@ -1562,6 +1562,21 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (inputs.force_pure_content) {
|
||||||
|
LOG_WRN("Forcing pure content template, will not render reasoning or tools separately.");
|
||||||
|
// Create the result structure
|
||||||
|
common_chat_params data;
|
||||||
|
auto params_copy = params;
|
||||||
|
params_copy.reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
||||||
|
data.prompt = common_chat_template_direct_apply(tmpl, params_copy);
|
||||||
|
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
|
||||||
|
auto parser = build_chat_peg_parser([](common_chat_peg_builder &p) {
|
||||||
|
return p.content(p.rest());
|
||||||
|
});
|
||||||
|
data.parser = parser.save();
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
// Ministral/Mistral Large 3 - uses special reasoning structure fixes, can't use autoparser
|
// Ministral/Mistral Large 3 - uses special reasoning structure fixes, can't use autoparser
|
||||||
// Note: Mistral Small 3.2 uses [CALL_ID] which Ministral doesn't have, so we can distinguish them
|
// Note: Mistral Small 3.2 uses [CALL_ID] which Ministral doesn't have, so we can distinguish them
|
||||||
if (src.find("[SYSTEM_PROMPT]") != std::string::npos && src.find("[TOOL_CALLS]") != std::string::npos &&
|
if (src.find("[SYSTEM_PROMPT]") != std::string::npos && src.find("[TOOL_CALLS]") != std::string::npos &&
|
||||||
|
|||||||
@@ -204,6 +204,7 @@ struct common_chat_templates_inputs {
|
|||||||
std::map<std::string, std::string> chat_template_kwargs;
|
std::map<std::string, std::string> chat_template_kwargs;
|
||||||
bool add_bos = false;
|
bool add_bos = false;
|
||||||
bool add_eos = false;
|
bool add_eos = false;
|
||||||
|
bool force_pure_content = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct common_chat_params {
|
struct common_chat_params {
|
||||||
|
|||||||
@@ -544,6 +544,7 @@ struct common_params {
|
|||||||
std::string chat_template = ""; // NOLINT
|
std::string chat_template = ""; // NOLINT
|
||||||
bool use_jinja = true; // NOLINT
|
bool use_jinja = true; // NOLINT
|
||||||
bool enable_chat_template = true;
|
bool enable_chat_template = true;
|
||||||
|
bool force_pure_content_parser = false;
|
||||||
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
||||||
int enable_reasoning = -1; // -1 = auto, 0 = disable, 1 = enable
|
int enable_reasoning = -1; // -1 = auto, 0 = disable, 1 = enable
|
||||||
int reasoning_budget = -1;
|
int reasoning_budget = -1;
|
||||||
|
|||||||
@@ -215,6 +215,7 @@ struct cli_context {
|
|||||||
inputs.parallel_tool_calls = false;
|
inputs.parallel_tool_calls = false;
|
||||||
inputs.add_generation_prompt = true;
|
inputs.add_generation_prompt = true;
|
||||||
inputs.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
inputs.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
||||||
|
inputs.force_pure_content = chat_params.force_pure_content;
|
||||||
inputs.enable_thinking = chat_params.enable_thinking ? common_chat_templates_support_enable_thinking(chat_params.tmpls.get()) : false;
|
inputs.enable_thinking = chat_params.enable_thinking ? common_chat_templates_support_enable_thinking(chat_params.tmpls.get()) : false;
|
||||||
|
|
||||||
// Apply chat template to the list of messages
|
// Apply chat template to the list of messages
|
||||||
|
|||||||
@@ -308,6 +308,7 @@ int main(int argc, char ** argv) {
|
|||||||
inputs.use_jinja = g_params->use_jinja;
|
inputs.use_jinja = g_params->use_jinja;
|
||||||
inputs.messages = chat_msgs;
|
inputs.messages = chat_msgs;
|
||||||
inputs.add_generation_prompt = !params.prompt.empty();
|
inputs.add_generation_prompt = !params.prompt.empty();
|
||||||
|
inputs.force_pure_content = params.force_pure_content_parser;
|
||||||
|
|
||||||
prompt = common_chat_templates_apply(chat_templates.get(), inputs).prompt;
|
prompt = common_chat_templates_apply(chat_templates.get(), inputs).prompt;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1065,6 +1065,7 @@ json oaicompat_chat_params_parse(
|
|||||||
|
|
||||||
inputs.add_generation_prompt = true;
|
inputs.add_generation_prompt = true;
|
||||||
}
|
}
|
||||||
|
inputs.force_pure_content = opt.force_pure_content;
|
||||||
|
|
||||||
// Apply chat template to the list of messages
|
// Apply chat template to the list of messages
|
||||||
auto chat_params = common_chat_templates_apply(opt.tmpls.get(), inputs);
|
auto chat_params = common_chat_templates_apply(opt.tmpls.get(), inputs);
|
||||||
|
|||||||
@@ -290,6 +290,7 @@ struct server_chat_params {
|
|||||||
int reasoning_budget = -1;
|
int reasoning_budget = -1;
|
||||||
std::string reasoning_budget_message;
|
std::string reasoning_budget_message;
|
||||||
std::string media_path;
|
std::string media_path;
|
||||||
|
bool force_pure_content = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
// used by /completions endpoint
|
// used by /completions endpoint
|
||||||
|
|||||||
@@ -911,6 +911,7 @@ private:
|
|||||||
/* reasoning_budget */ params_base.reasoning_budget,
|
/* reasoning_budget */ params_base.reasoning_budget,
|
||||||
/* reasoning_budget_msg */ params_base.reasoning_budget_message,
|
/* reasoning_budget_msg */ params_base.reasoning_budget_message,
|
||||||
/* media_path */ params_base.media_path,
|
/* media_path */ params_base.media_path,
|
||||||
|
/* force_pure_content */ params_base.force_pure_content_parser
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user