mtmd: refactor mtmd_decode_use_mrope (#22161)
This commit is contained in:
+5
-15
@@ -131,6 +131,7 @@ struct mtmd_context {
|
|||||||
int n_threads;
|
int n_threads;
|
||||||
std::string media_marker;
|
std::string media_marker;
|
||||||
const int n_embd_text;
|
const int n_embd_text;
|
||||||
|
llama_rope_type decoder_rope;
|
||||||
|
|
||||||
// these are not token, but strings used to mark the beginning and end of image/audio embeddings
|
// these are not token, but strings used to mark the beginning and end of image/audio embeddings
|
||||||
std::string img_beg;
|
std::string img_beg;
|
||||||
@@ -167,7 +168,8 @@ struct mtmd_context {
|
|||||||
print_timings(ctx_params.print_timings),
|
print_timings(ctx_params.print_timings),
|
||||||
n_threads (ctx_params.n_threads),
|
n_threads (ctx_params.n_threads),
|
||||||
media_marker (ctx_params.media_marker),
|
media_marker (ctx_params.media_marker),
|
||||||
n_embd_text (llama_model_n_embd_inp(text_model))
|
n_embd_text (llama_model_n_embd_inp(text_model)),
|
||||||
|
decoder_rope (llama_model_rope_type(text_model))
|
||||||
{
|
{
|
||||||
if (ctx_params.image_marker != nullptr) {
|
if (ctx_params.image_marker != nullptr) {
|
||||||
throw std::runtime_error("custom image_marker is not supported anymore, use media_marker instead");
|
throw std::runtime_error("custom image_marker is not supported anymore, use media_marker instead");
|
||||||
@@ -1029,20 +1031,8 @@ bool mtmd_decode_use_non_causal(mtmd_context * ctx, const mtmd_input_chunk * chu
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool mtmd_decode_use_mrope(mtmd_context * ctx) {
|
bool mtmd_decode_use_mrope(mtmd_context * ctx) {
|
||||||
if (ctx->ctx_v == nullptr && ctx->proj_type_a() == PROJECTOR_TYPE_QWEN3A) {
|
return ctx->decoder_rope == LLAMA_ROPE_TYPE_MROPE
|
||||||
// qwen3-asr
|
|| ctx->decoder_rope == LLAMA_ROPE_TYPE_IMROPE;
|
||||||
return true;
|
|
||||||
}
|
|
||||||
switch (ctx->proj_type_v()) {
|
|
||||||
case PROJECTOR_TYPE_QWEN2VL:
|
|
||||||
case PROJECTOR_TYPE_QWEN25VL:
|
|
||||||
case PROJECTOR_TYPE_QWEN3VL:
|
|
||||||
case PROJECTOR_TYPE_GLM4V:
|
|
||||||
case PROJECTOR_TYPE_PADDLEOCR:
|
|
||||||
return true;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool mtmd_support_vision(mtmd_context * ctx) {
|
bool mtmd_support_vision(mtmd_context * ctx) {
|
||||||
|
|||||||
Reference in New Issue
Block a user