fix: correct misspellings in code comments (#21217)
- emdeddings → embeddings (gemma3.cpp, gemma3n-iswa.cpp, gemma-embedding.cpp) - imlpemented → implemented (llama-adapter.cpp) - interere → interfere (llama-graph.cpp) - overridde → overridden (chat.cpp) - stastistics → statistics (ngram-map.h) - layed → laid (llama-kv-cache.h) - worster → worst (llama-context.cpp) - sequantial → sequential (llama-batch.h)
This commit is contained in:
+1
-1
@@ -221,7 +221,7 @@ using chat_template_caps = jinja::caps;
|
|||||||
struct common_chat_templates {
|
struct common_chat_templates {
|
||||||
bool add_bos;
|
bool add_bos;
|
||||||
bool add_eos;
|
bool add_eos;
|
||||||
bool has_explicit_template; // Model had builtin template or template overridde was specified.
|
bool has_explicit_template; // Model had builtin template or template overridden was specified.
|
||||||
std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
|
std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
|
||||||
std::unique_ptr<common_chat_template> template_tool_use;
|
std::unique_ptr<common_chat_template> template_tool_use;
|
||||||
};
|
};
|
||||||
|
|||||||
+1
-1
@@ -51,7 +51,7 @@ struct common_ngram_map_value {
|
|||||||
// statistics of a n-gram
|
// statistics of a n-gram
|
||||||
struct common_ngram_map_key {
|
struct common_ngram_map_key {
|
||||||
size_t key_idx; // index of key n-gram in token-history
|
size_t key_idx; // index of key n-gram in token-history
|
||||||
size_t stat_idx; // index of last token of stastistics computation (key_num, values)
|
size_t stat_idx; // index of last token of statistics computation (key_num, values)
|
||||||
|
|
||||||
uint16_t key_num; // number of occurrences of this key n-gram in token-history
|
uint16_t key_num; // number of occurrences of this key n-gram in token-history
|
||||||
common_ngram_map_value values[COMMON_NGRAM_MAX_VALUES]; // some known values after the key
|
common_ngram_map_value values[COMMON_NGRAM_MAX_VALUES]; // some known values after the key
|
||||||
|
|||||||
@@ -294,7 +294,7 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
|
|||||||
}
|
}
|
||||||
|
|
||||||
// get extra buffer types of the CPU
|
// get extra buffer types of the CPU
|
||||||
// TODO: a more general solution for non-CPU extra buft should be imlpemented in the future
|
// TODO: a more general solution for non-CPU extra buft should be implemented in the future
|
||||||
// ref: https://github.com/ggml-org/llama.cpp/pull/12593#pullrequestreview-2718659948
|
// ref: https://github.com/ggml-org/llama.cpp/pull/12593#pullrequestreview-2718659948
|
||||||
std::vector<ggml_backend_buffer_type_t> buft_extra;
|
std::vector<ggml_backend_buffer_type_t> buft_extra;
|
||||||
{
|
{
|
||||||
|
|||||||
+1
-1
@@ -18,7 +18,7 @@ struct llama_ubatch {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// typical for M-RoPE cases:
|
// typical for M-RoPE cases:
|
||||||
// 0 - sequantial position of the tokens/embeddings in the sequence
|
// 0 - sequential position of the tokens/embeddings in the sequence
|
||||||
// 1 - y position in the image
|
// 1 - y position in the image
|
||||||
// 2 - x position in the image
|
// 2 - x position in the image
|
||||||
// 3 - other
|
// 3 - other
|
||||||
|
|||||||
@@ -586,7 +586,7 @@ void llama_context::sched_reserve() {
|
|||||||
|
|
||||||
// reserve again with pp graph to avoid ggml-alloc reallocations during inference
|
// reserve again with pp graph to avoid ggml-alloc reallocations during inference
|
||||||
{
|
{
|
||||||
// TODO: not sure if the following graph would be worster case for multi-stream KV caches:
|
// TODO: not sure if the following graph would be worst case for multi-stream KV caches:
|
||||||
//
|
//
|
||||||
// auto * gf = graph_reserve(n_tokens, 1, n_tokens, mctx.get());
|
// auto * gf = graph_reserve(n_tokens, 1, n_tokens, mctx.get());
|
||||||
//
|
//
|
||||||
|
|||||||
+1
-1
@@ -1665,7 +1665,7 @@ ggml_tensor * llm_graph_context::build_inp_attn_scale() const {
|
|||||||
|
|
||||||
ggml_tensor * llm_graph_context::build_inp_out_ids() const {
|
ggml_tensor * llm_graph_context::build_inp_out_ids() const {
|
||||||
// note: when all tokens are output, we could skip this optimization to spare the ggml_get_rows() calls,
|
// note: when all tokens are output, we could skip this optimization to spare the ggml_get_rows() calls,
|
||||||
// but this would make the graph topology depend on the number of output tokens, which can interere with
|
// but this would make the graph topology depend on the number of output tokens, which can interfere with
|
||||||
// features that require constant topology such as pipeline parallelism
|
// features that require constant topology such as pipeline parallelism
|
||||||
// ref: https://github.com/ggml-org/llama.cpp/pull/14275#issuecomment-2987424471
|
// ref: https://github.com/ggml-org/llama.cpp/pull/14275#issuecomment-2987424471
|
||||||
//if (n_outputs < n_tokens) {
|
//if (n_outputs < n_tokens) {
|
||||||
|
|||||||
@@ -333,7 +333,7 @@ public:
|
|||||||
ggml_tensor * get_v(ggml_context * ctx, int32_t il) const;
|
ggml_tensor * get_v(ggml_context * ctx, int32_t il) const;
|
||||||
|
|
||||||
// store k_cur and v_cur in the cache based on the provided head location
|
// store k_cur and v_cur in the cache based on the provided head location
|
||||||
// note: the heads in k_cur and v_cur should be layed out contiguously in memory
|
// note: the heads in k_cur and v_cur should be laid out contiguously in memory
|
||||||
// - k_cur [n_embd_head_k, n_head_k, n_tokens]
|
// - k_cur [n_embd_head_k, n_head_k, n_tokens]
|
||||||
// - k_idxs [n_tokens]
|
// - k_idxs [n_tokens]
|
||||||
// - v_cur [n_embd_head_v, n_head_v, n_tokens]
|
// - v_cur [n_embd_head_v, n_head_v, n_tokens]
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ llm_build_gemma_embedding::llm_build_gemma_embedding(const llama_model & model,
|
|||||||
|
|
||||||
inpL = build_inp_embd(model.tok_embd);
|
inpL = build_inp_embd(model.tok_embd);
|
||||||
|
|
||||||
// important: do not normalize weights for raw embeddings input (i.e. encoded image emdeddings)
|
// important: do not normalize weights for raw embeddings input (i.e. encoded image embeddings)
|
||||||
inpL = ggml_scale(ctx0, inpL, ubatch.token ? sqrtf(n_embd) : 1.0f);
|
inpL = ggml_scale(ctx0, inpL, ubatch.token ? sqrtf(n_embd) : 1.0f);
|
||||||
cb(inpL, "inp_scaled", -1);
|
cb(inpL, "inp_scaled", -1);
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ llm_build_gemma3<iswa>::llm_build_gemma3(const llama_model & model, const llm_gr
|
|||||||
|
|
||||||
inpL = build_inp_embd(model.tok_embd);
|
inpL = build_inp_embd(model.tok_embd);
|
||||||
|
|
||||||
// important: do not normalize weights for raw embeddings input (i.e. encoded image emdeddings)
|
// important: do not normalize weights for raw embeddings input (i.e. encoded image embeddings)
|
||||||
inpL = ggml_scale(ctx0, inpL, ubatch.token ? sqrtf(n_embd) : 1.0f);
|
inpL = ggml_scale(ctx0, inpL, ubatch.token ? sqrtf(n_embd) : 1.0f);
|
||||||
cb(inpL, "inp_scaled", -1);
|
cb(inpL, "inp_scaled", -1);
|
||||||
|
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ llm_build_gemma3n_iswa::llm_build_gemma3n_iswa(const llama_model & model, const
|
|||||||
|
|
||||||
inpL = build_inp_embd(model.tok_embd);
|
inpL = build_inp_embd(model.tok_embd);
|
||||||
|
|
||||||
// important: do not normalize weights for raw embeddings input (i.e. encoded image emdeddings)
|
// important: do not normalize weights for raw embeddings input (i.e. encoded image embeddings)
|
||||||
inpL = ggml_scale(ctx0, inpL, ubatch.token ? sqrtf(n_embd) : 1.0f);
|
inpL = ggml_scale(ctx0, inpL, ubatch.token ? sqrtf(n_embd) : 1.0f);
|
||||||
cb(inpL, "inp_scaled", -1);
|
cb(inpL, "inp_scaled", -1);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user