kv-cache: Fix state restore fragmented cache (#17982)
* kv-cache : fix state restore with fragmented cache (#17527) Change find_slot to allow non-contiguous allocation during state restore. Fixes 'failed to find available cells in kv cache' error when restoring state to fragmented cache. * tests : update logic * cleanup: tightened state_read_meta sig, added is_contiguous case * fix: state_read_meta arg reorder loose ends --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
@@ -222,6 +222,14 @@ llama_build_and_test(test-backend-ops.cpp)
|
||||
llama_build_and_test(test-model-load-cancel.cpp LABEL "model")
|
||||
llama_build_and_test(test-autorelease.cpp LABEL "model")
|
||||
|
||||
# Test for state restore with fragmented KV cache
|
||||
# Requires a model, uses same args pattern as test-thread-safety
|
||||
if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "s390x")
|
||||
llama_build_and_test(test-state-restore-fragmented.cpp LABEL "model" ARGS -hf ggml-org/models -hff tinyllamas/stories15M-q4_0.gguf)
|
||||
else()
|
||||
llama_build_and_test(test-state-restore-fragmented.cpp LABEL "model" ARGS -hf ggml-org/models -hff tinyllamas/stories15M-be.Q4_0.gguf)
|
||||
endif()
|
||||
|
||||
if (NOT GGML_BACKEND_DL)
|
||||
# these tests use the backends directly and cannot be built with dynamic loading
|
||||
llama_build_and_test(test-barrier.cpp)
|
||||
|
||||
Reference in New Issue
Block a user