fit-params : refactor + add option to output estimated memory per device (#22171)

* fit-params : add option to output estimated memory per device * cont : minor * cont : refactor * cont : move fit params implementation to libcommon * cont : header * cont : headers * cont : codeowners
2026-04-21 09:54:36 +03:00
parent ff6b1062af
commit cfe9838d26
19 changed files with 1123 additions and 980 deletions
@@ -22,6 +22,7 @@
 #include "build-info.h"
 #include "common.h"
 #include "download.h"
+#include "fit.h"
 #include "ggml.h"
 #include "llama.h"

@@ -2225,7 +2226,7 @@ int main(int argc, char ** argv) {
                prev_inst = nullptr;
            }

-            // use default n_gpu_layers and n_ctx so llama_params_fit can adjust them
+            // use default n_gpu_layers and n_ctx so common_fit_params can adjust them
            mparams.n_gpu_layers          = llama_model_default_params().n_gpu_layers;
            mparams.tensor_split          = fit_tensor_split.data();
            mparams.tensor_buft_overrides = fit_overrides.data();
@@ -2236,7 +2237,7 @@ int main(int argc, char ** argv) {
            uint32_t n_ctx_needed = inst.n_prompt + inst.n_gen + inst.n_depth;
            cparams.n_ctx = std::max(cparams.n_ctx, n_ctx_needed);

-            llama_params_fit(inst.model.c_str(), &mparams, &cparams,
+            common_fit_params(inst.model.c_str(), &mparams, &cparams,
                fit_tensor_split.data(),
                fit_overrides.data(),
                margins.data(),