spec : refactor params (#22397)
* spec : refactor params * cont : fix * cont : rename "sparam" to "sampling" * cont : add spec params category * cont : add info about removed arguments * cont : skip param length check for spec params * cont : adapt server tests
This commit is contained in:
@@ -83,15 +83,14 @@ class ServerProcess:
|
||||
kv_unified: bool | None = False
|
||||
server_slots: bool | None = False
|
||||
pooling: str | None = None
|
||||
draft: int | None = None
|
||||
api_key: str | None = None
|
||||
models_dir: str | None = None
|
||||
models_max: int | None = None
|
||||
no_models_autoload: bool | None = None
|
||||
lora_files: List[str] | None = None
|
||||
enable_ctx_shift: int | None = False
|
||||
draft_min: int | None = None
|
||||
draft_max: int | None = None
|
||||
spec_draft_n_min: int | None = None
|
||||
spec_draft_n_max: int | None = None
|
||||
no_webui: bool | None = None
|
||||
jinja: bool | None = None
|
||||
reasoning_format: Literal['deepseek', 'none', 'nothink'] | None = None
|
||||
@@ -165,8 +164,6 @@ class ServerProcess:
|
||||
server_args.extend(["--threads", self.n_threads])
|
||||
if self.n_gpu_layer:
|
||||
server_args.extend(["--n-gpu-layers", self.n_gpu_layer])
|
||||
if self.draft is not None:
|
||||
server_args.extend(["--draft", self.draft])
|
||||
if self.server_continuous_batching:
|
||||
server_args.append("--cont-batching")
|
||||
if self.server_embeddings:
|
||||
@@ -214,10 +211,10 @@ class ServerProcess:
|
||||
server_args.append("--context-shift")
|
||||
if self.api_key:
|
||||
server_args.extend(["--api-key", self.api_key])
|
||||
if self.draft_max:
|
||||
server_args.extend(["--draft-max", self.draft_max])
|
||||
if self.draft_min:
|
||||
server_args.extend(["--draft-min", self.draft_min])
|
||||
if self.spec_draft_n_max:
|
||||
server_args.extend(["--spec-draft-n-max", self.spec_draft_n_max])
|
||||
if self.spec_draft_n_min:
|
||||
server_args.extend(["--spec-draft-n-min", self.spec_draft_n_min])
|
||||
if self.no_webui:
|
||||
server_args.append("--no-webui")
|
||||
if self.no_models_autoload:
|
||||
|
||||
Reference in New Issue
Block a user