model : add LFM2-ColBert-350M (#18607)
* model : add LFM2-ColBert-350M * llama_model_n_embd_out() - returns `hparams.n_embd_out` if set and fallbacks to `hparams.n_embd`
This commit is contained in:
@@ -104,6 +104,7 @@ class Keys:
|
||||
VOCAB_SIZE = "{arch}.vocab_size"
|
||||
CONTEXT_LENGTH = "{arch}.context_length"
|
||||
EMBEDDING_LENGTH = "{arch}.embedding_length"
|
||||
EMBEDDING_LENGTH_OUT = "{arch}.embedding_length_out"
|
||||
FEATURES_LENGTH = "{arch}.features_length"
|
||||
BLOCK_COUNT = "{arch}.block_count"
|
||||
LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
|
||||
@@ -3038,6 +3039,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||
MODEL_TENSOR.ATTN_V,
|
||||
MODEL_TENSOR.ATTN_OUT,
|
||||
MODEL_TENSOR.OUTPUT,
|
||||
MODEL_TENSOR.DENSE_2_OUT, # LFM2-ColBert-350M
|
||||
],
|
||||
MODEL_ARCH.LFM2MOE: [
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
|
||||
@@ -681,6 +681,9 @@ class GGUFWriter:
|
||||
def add_embedding_length(self, length: int) -> None:
|
||||
self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length)
|
||||
|
||||
def add_embedding_length_out(self, length: int) -> None:
|
||||
self.add_uint32(Keys.LLM.EMBEDDING_LENGTH_OUT.format(arch=self.arch), length)
|
||||
|
||||
def add_features_length(self, length: int) -> None:
|
||||
self.add_uint32(Keys.LLM.FEATURES_LENGTH.format(arch=self.arch), length)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user