From 9d34231bb89590ee760ae19ba665e7855cd4fd4e Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Sat, 25 Apr 2026 01:25:35 -0500
Subject: [PATCH] llama-quant : default ftype param `Q5_1` --> `Q8_0` (#20828)

Change the default `ftype` in `llama_model_quantize_params` from
`LLAMA_FTYPE_MOSTLY_Q5_1` to `LLAMA_FTYPE_MOSTLY_Q8_0`.

In case some external program naively uses the default quantization
params, we should probably default to a known-good type like Q8_0 rather
than Q5_1, which is rather old.
---
 src/llama-quant.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp
index f91d795b3..25a333b4a 100644
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@@ -1283,7 +1283,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
 llama_model_quantize_params llama_model_quantize_default_params() {
     llama_model_quantize_params result = {
         /*.nthread                     =*/ 0,
-        /*.ftype                       =*/ LLAMA_FTYPE_MOSTLY_Q5_1,
+        /*.ftype                       =*/ LLAMA_FTYPE_MOSTLY_Q8_0,
         /*.output_tensor_type          =*/ GGML_TYPE_COUNT,
         /*.token_embedding_type        =*/ GGML_TYPE_COUNT,
         /*.allow_requantize            =*/ false,