server : add thinking content blocks to Anthropic Messages API (#18551)
* server : add thinking content blocks to Anthropic Messages API Add support for returning reasoning/thinking content in Anthropic API responses when using models with --reasoning-format deepseek and the thinking parameter enabled. - Non-streaming: adds thinking block before text in content array - Streaming: emits thinking_delta events with correct block indices - Partial streaming: tracks reasoning state across chunks via anthropic_has_reasoning member variable Tested with bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF model. * server : fix Anthropic API streaming for thinking content blocks Add signature field and fix duplicate content_block_start events in Anthropic Messages API streaming responses for reasoning models. * server: refactor Anthropic streaming state to avoid raw pointer Replace raw pointer to task_result_state with direct field copies: - Copy state fields in update() before processing chunk - Use local copies in to_json_anthropic() instead of dereferencing - Pre-compute state updates for next chunk in update() This makes the data flow clearer and avoids unsafe pointer patterns.
This commit is contained in:
@@ -96,6 +96,10 @@ struct task_result_state {
|
||||
std::string generated_text; // append new chunks of generated text here
|
||||
std::vector<std::string> generated_tool_call_ids;
|
||||
|
||||
// for Anthropic API streaming: track content block state across chunks
|
||||
bool anthropic_thinking_block_started = false;
|
||||
bool anthropic_text_block_started = false;
|
||||
|
||||
task_result_state(const common_chat_syntax & oaicompat_chat_syntax)
|
||||
: oaicompat_chat_syntax(oaicompat_chat_syntax) {}
|
||||
|
||||
@@ -337,6 +341,12 @@ struct server_task_result_cmpl_partial : server_task_result {
|
||||
std::vector<common_chat_msg_diff> oaicompat_msg_diffs; // to be populated by update()
|
||||
bool is_updated = false;
|
||||
|
||||
// for Anthropic API: track if any reasoning content has been generated
|
||||
bool anthropic_has_reasoning = false;
|
||||
// Streaming state copied from task_result_state for this chunk
|
||||
bool anthropic_thinking_block_started = false;
|
||||
bool anthropic_text_block_started = false;
|
||||
|
||||
virtual bool is_stop() override {
|
||||
return false; // in stream mode, partial responses are not considered stop
|
||||
}
|
||||
@@ -346,6 +356,22 @@ struct server_task_result_cmpl_partial : server_task_result {
|
||||
virtual void update(task_result_state & state) override {
|
||||
is_updated = true;
|
||||
state.update_chat_msg(content, true, oaicompat_msg_diffs);
|
||||
// track if the accumulated message has any reasoning content
|
||||
anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty();
|
||||
|
||||
// Copy current state for use in to_json_anthropic() (reflects state BEFORE this chunk)
|
||||
anthropic_thinking_block_started = state.anthropic_thinking_block_started;
|
||||
anthropic_text_block_started = state.anthropic_text_block_started;
|
||||
|
||||
// Pre-compute state updates based on diffs (for next chunk)
|
||||
for (const auto & diff : oaicompat_msg_diffs) {
|
||||
if (!diff.reasoning_content_delta.empty() && !state.anthropic_thinking_block_started) {
|
||||
state.anthropic_thinking_block_started = true;
|
||||
}
|
||||
if (!diff.content_delta.empty() && !state.anthropic_text_block_started) {
|
||||
state.anthropic_text_block_started = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
json to_json_non_oaicompat();
|
||||
|
||||
Reference in New Issue
Block a user