Files
llama.cpp/tools/mtmd/CMakeLists.txt
T
Simranjeet Singh a61c8bc3bf mtmd: Add Gemma3n multimodal support with MobileNetV5 vision encoder (#18256)
* Add Gemma3nVisionModel - MobileNetV5 vision encoder convertor to convert_hf_to_gguf.py. Add gemma3n to vision projectors in gguf-py/gguf/constants.py.

* Add mobilenetv5 impl

* Fix comments, remove unused vars

* Fix permute and remove transpose of projection weights

* Fix comments, remove debugging prints from hf_to_gguf

* 1. Hard-code image_mean = 0 and image_std = 1
2. Use available tensor mapping logic
3. Remove redundant chat template replacement of soft tokens placeholder with media placeholder

* 1. Move mobilenetv5 helpers declarations to `clip_graph_mobilenetv5` struct and definitions to mobilenetv5.cpp
2.Remove unused `clip_is_gemma3n` func declarations and definitions
3. Remove redundant `rescale_image_u8_to_f32` func and use `normalize_image_u8_to_f32` with zero mean and unit std
4. Calculate n_patches using image_size / patch_size

* Remove obsolete comments

* - convert_hf_to_gguf.py & constants.py & tensor_mapping.py: Use explicit mapping: Custom map for double indexed blocks and tensor_mapping.py for rest
- convert_hf_to_gguf.py: Unsqueeze Stem Bias and Layer scale tensors to correct shape while converting to gguf
- mobilenetv5.cpp: Remove explicit reshaping of Stem Bias and Layer scale which are now handled while converting to gguf, replace fprintf with LOG_*
- clip.cpp: Remove unused embedding and hard_emb_norm tensor loading

* - Rename tensors to v.conv..., v.blk..., v.msfa... to better align with already existing terminology

* Fix stem conv bias name

* Remove explicit handling of bias term for stem conv

* - Change order of addition in "project_per_layer_inputs" to support broadcasting of vision inp_per_layer
- Simplify the vision embeddings path of "get_per_layer_inputs" to output [n_embd_altup, n_layer, 1], broadcastable

* clean up conversion script

* fix code style

* also preserve audio tensors

* trailing space

* split arch A and V

* rm unused gemma3 func

* fix alignment

---------

Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
2026-01-09 23:42:38 +01:00

96 lines
2.8 KiB
CMake

# mtmd
find_package(Threads REQUIRED)
add_library(mtmd
mtmd.cpp
mtmd-audio.cpp
mtmd.h
mtmd-helper.cpp
mtmd-helper.h
clip.cpp
clip.h
clip-impl.h
clip-model.h
clip-graph.h
models/models.h
models/cogvlm.cpp
models/conformer.cpp
models/glm4v.cpp
models/internvl.cpp
models/kimivl.cpp
models/llama4.cpp
models/llava.cpp
models/minicpmv.cpp
models/pixtral.cpp
models/qwen2vl.cpp
models/qwen3vl.cpp
models/siglip.cpp
models/whisper-enc.cpp
models/mobilenetv5.cpp
models/youtuvl.cpp
)
set_target_properties(mtmd PROPERTIES
VERSION ${LLAMA_INSTALL_VERSION}
SOVERSION 0
MACHO_CURRENT_VERSION 0 # keep macOS linker from seeing oversized version number
)
target_link_libraries (mtmd PUBLIC ggml llama)
target_link_libraries (mtmd PRIVATE Threads::Threads)
target_include_directories(mtmd PUBLIC .)
target_include_directories(mtmd PRIVATE ../..)
target_include_directories(mtmd PRIVATE ../../vendor)
target_compile_features (mtmd PRIVATE cxx_std_17)
if (BUILD_SHARED_LIBS)
set_target_properties (mtmd PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(mtmd PRIVATE LLAMA_BUILD)
target_compile_definitions(mtmd PUBLIC LLAMA_SHARED)
endif()
set(MTMD_PUBLIC_HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/mtmd.h
${CMAKE_CURRENT_SOURCE_DIR}/mtmd-helper.h
)
set_target_properties(mtmd
PROPERTIES
PUBLIC_HEADER "${MTMD_PUBLIC_HEADERS}")
install(TARGETS mtmd LIBRARY PUBLIC_HEADER)
if (NOT MSVC)
# for stb_image.h and miniaudio.h
target_compile_options(mtmd PRIVATE -Wno-cast-qual)
endif()
if (TARGET BUILD_INFO)
add_dependencies(mtmd BUILD_INFO)
add_dependencies(mtmd-helper BUILD_INFO)
endif()
# if mtmd is linked against common, we throw an error
if (TARGET mtmd)
get_target_property(libs mtmd LINK_LIBRARIES)
if (libs AND "common" IN_LIST libs)
message(FATAL_ERROR "mtmd is designed to be a public library.\n"
"It must not link against common")
endif()
endif()
add_executable(llama-llava-cli deprecation-warning.cpp)
add_executable(llama-gemma3-cli deprecation-warning.cpp)
add_executable(llama-minicpmv-cli deprecation-warning.cpp)
add_executable(llama-qwen2vl-cli deprecation-warning.cpp)
set(TARGET llama-mtmd-cli)
add_executable (${TARGET} mtmd-cli.cpp)
set_target_properties (${TARGET} PROPERTIES OUTPUT_NAME llama-mtmd-cli)
if(LLAMA_TOOLS_INSTALL)
install(TARGETS ${TARGET} RUNTIME)
endif()
target_link_libraries (${TARGET} PRIVATE common mtmd Threads::Threads)
target_compile_features(${TARGET} PRIVATE cxx_std_17)