llama-bench: add -fitc and -fitt to arguments (#21304)

* llama-bench: add `-fitc` and `-fitt` to arguments

* update README.md

* address review comments

* update compare-llama-bench.py
This commit is contained in:
Aman Gupta
2026-04-06 22:26:02 +08:00
committed by GitHub
parent 4aa962e2b0
commit 94ca829b60
3 changed files with 107 additions and 6 deletions
+5 -2
View File
@@ -29,7 +29,8 @@ LLAMA_BENCH_DB_FIELDS = [
"cpu_mask", "cpu_strict", "poll", "type_k", "type_v", "n_gpu_layers",
"split_mode", "main_gpu", "no_kv_offload", "flash_attn", "tensor_split", "tensor_buft_overrides",
"use_mmap", "embeddings", "no_op_offload", "n_prompt", "n_gen", "n_depth",
"test_time", "avg_ns", "stddev_ns", "avg_ts", "stddev_ts", "n_cpu_moe"
"test_time", "avg_ns", "stddev_ns", "avg_ts", "stddev_ts", "n_cpu_moe",
"fit_target", "fit_min_ctx"
]
LLAMA_BENCH_DB_TYPES = [
@@ -39,6 +40,7 @@ LLAMA_BENCH_DB_TYPES = [
"TEXT", "INTEGER", "INTEGER", "INTEGER", "TEXT", "TEXT",
"INTEGER", "INTEGER", "INTEGER", "INTEGER", "INTEGER", "INTEGER",
"TEXT", "INTEGER", "INTEGER", "REAL", "REAL", "INTEGER",
"INTEGER", "INTEGER"
]
# All test-backend-ops SQL fields
@@ -61,7 +63,8 @@ assert len(TEST_BACKEND_OPS_DB_FIELDS) == len(TEST_BACKEND_OPS_DB_TYPES)
LLAMA_BENCH_KEY_PROPERTIES = [
"cpu_info", "gpu_info", "backends", "n_gpu_layers", "n_cpu_moe", "tensor_buft_overrides", "model_filename", "model_type",
"n_batch", "n_ubatch", "embeddings", "cpu_mask", "cpu_strict", "poll", "n_threads", "type_k", "type_v",
"use_mmap", "no_kv_offload", "split_mode", "main_gpu", "tensor_split", "flash_attn", "n_prompt", "n_gen", "n_depth"
"use_mmap", "no_kv_offload", "split_mode", "main_gpu", "tensor_split", "flash_attn", "n_prompt", "n_gen", "n_depth",
"fit_target", "fit_min_ctx"
]
# Properties by which to differentiate results per commit for test-backend-ops: