a61c8bc3bf
* Add Gemma3nVisionModel - MobileNetV5 vision encoder convertor to convert_hf_to_gguf.py. Add gemma3n to vision projectors in gguf-py/gguf/constants.py. * Add mobilenetv5 impl * Fix comments, remove unused vars * Fix permute and remove transpose of projection weights * Fix comments, remove debugging prints from hf_to_gguf * 1. Hard-code image_mean = 0 and image_std = 1 2. Use available tensor mapping logic 3. Remove redundant chat template replacement of soft tokens placeholder with media placeholder * 1. Move mobilenetv5 helpers declarations to `clip_graph_mobilenetv5` struct and definitions to mobilenetv5.cpp 2.Remove unused `clip_is_gemma3n` func declarations and definitions 3. Remove redundant `rescale_image_u8_to_f32` func and use `normalize_image_u8_to_f32` with zero mean and unit std 4. Calculate n_patches using image_size / patch_size * Remove obsolete comments * - convert_hf_to_gguf.py & constants.py & tensor_mapping.py: Use explicit mapping: Custom map for double indexed blocks and tensor_mapping.py for rest - convert_hf_to_gguf.py: Unsqueeze Stem Bias and Layer scale tensors to correct shape while converting to gguf - mobilenetv5.cpp: Remove explicit reshaping of Stem Bias and Layer scale which are now handled while converting to gguf, replace fprintf with LOG_* - clip.cpp: Remove unused embedding and hard_emb_norm tensor loading * - Rename tensors to v.conv..., v.blk..., v.msfa... to better align with already existing terminology * Fix stem conv bias name * Remove explicit handling of bias term for stem conv * - Change order of addition in "project_per_layer_inputs" to support broadcasting of vision inp_per_layer - Simplify the vision embeddings path of "get_per_layer_inputs" to output [n_embd_altup, n_layer, 1], broadcastable * clean up conversion script * fix code style * also preserve audio tensors * trailing space * split arch A and V * rm unused gemma3 func * fix alignment --------- Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
96 lines
2.8 KiB
CMake
96 lines
2.8 KiB
CMake
# mtmd
|
|
|
|
find_package(Threads REQUIRED)
|
|
|
|
add_library(mtmd
|
|
mtmd.cpp
|
|
mtmd-audio.cpp
|
|
mtmd.h
|
|
mtmd-helper.cpp
|
|
mtmd-helper.h
|
|
clip.cpp
|
|
clip.h
|
|
clip-impl.h
|
|
clip-model.h
|
|
clip-graph.h
|
|
models/models.h
|
|
models/cogvlm.cpp
|
|
models/conformer.cpp
|
|
models/glm4v.cpp
|
|
models/internvl.cpp
|
|
models/kimivl.cpp
|
|
models/llama4.cpp
|
|
models/llava.cpp
|
|
models/minicpmv.cpp
|
|
models/pixtral.cpp
|
|
models/qwen2vl.cpp
|
|
models/qwen3vl.cpp
|
|
models/siglip.cpp
|
|
models/whisper-enc.cpp
|
|
models/mobilenetv5.cpp
|
|
models/youtuvl.cpp
|
|
)
|
|
|
|
set_target_properties(mtmd PROPERTIES
|
|
VERSION ${LLAMA_INSTALL_VERSION}
|
|
SOVERSION 0
|
|
MACHO_CURRENT_VERSION 0 # keep macOS linker from seeing oversized version number
|
|
)
|
|
|
|
target_link_libraries (mtmd PUBLIC ggml llama)
|
|
target_link_libraries (mtmd PRIVATE Threads::Threads)
|
|
target_include_directories(mtmd PUBLIC .)
|
|
target_include_directories(mtmd PRIVATE ../..)
|
|
target_include_directories(mtmd PRIVATE ../../vendor)
|
|
target_compile_features (mtmd PRIVATE cxx_std_17)
|
|
|
|
if (BUILD_SHARED_LIBS)
|
|
set_target_properties (mtmd PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
target_compile_definitions(mtmd PRIVATE LLAMA_BUILD)
|
|
target_compile_definitions(mtmd PUBLIC LLAMA_SHARED)
|
|
endif()
|
|
|
|
set(MTMD_PUBLIC_HEADERS
|
|
${CMAKE_CURRENT_SOURCE_DIR}/mtmd.h
|
|
${CMAKE_CURRENT_SOURCE_DIR}/mtmd-helper.h
|
|
)
|
|
|
|
set_target_properties(mtmd
|
|
PROPERTIES
|
|
PUBLIC_HEADER "${MTMD_PUBLIC_HEADERS}")
|
|
|
|
install(TARGETS mtmd LIBRARY PUBLIC_HEADER)
|
|
|
|
if (NOT MSVC)
|
|
# for stb_image.h and miniaudio.h
|
|
target_compile_options(mtmd PRIVATE -Wno-cast-qual)
|
|
endif()
|
|
|
|
if (TARGET BUILD_INFO)
|
|
add_dependencies(mtmd BUILD_INFO)
|
|
add_dependencies(mtmd-helper BUILD_INFO)
|
|
endif()
|
|
|
|
# if mtmd is linked against common, we throw an error
|
|
if (TARGET mtmd)
|
|
get_target_property(libs mtmd LINK_LIBRARIES)
|
|
if (libs AND "common" IN_LIST libs)
|
|
message(FATAL_ERROR "mtmd is designed to be a public library.\n"
|
|
"It must not link against common")
|
|
endif()
|
|
endif()
|
|
|
|
add_executable(llama-llava-cli deprecation-warning.cpp)
|
|
add_executable(llama-gemma3-cli deprecation-warning.cpp)
|
|
add_executable(llama-minicpmv-cli deprecation-warning.cpp)
|
|
add_executable(llama-qwen2vl-cli deprecation-warning.cpp)
|
|
|
|
set(TARGET llama-mtmd-cli)
|
|
add_executable (${TARGET} mtmd-cli.cpp)
|
|
set_target_properties (${TARGET} PROPERTIES OUTPUT_NAME llama-mtmd-cli)
|
|
if(LLAMA_TOOLS_INSTALL)
|
|
install(TARGETS ${TARGET} RUNTIME)
|
|
endif()
|
|
target_link_libraries (${TARGET} PRIVATE common mtmd Threads::Threads)
|
|
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|