sleepy/llama.cpp

Fork 0

Files

T

History

Kaloyan Nikolov 222626cfdc

CI (3rd-party) / ubuntu-24-llguidance (push) Waiting to run

Details

CI (android) / android (push) Waiting to run

Details

CI (android) / android-ndk (push) Waiting to run

Details

CI (apple) / macOS-latest-ios (push) Waiting to run

Details

CI (apple) / macos-latest-ios-xcode (push) Waiting to run

Details

CI (apple) / macOS-latest-tvos (push) Waiting to run

Details

CI (apple) / macOS-latest-visionos (push) Waiting to run

Details

CI (apple) / macOS-latest-swift (generic/platform=iOS) (push) Blocked by required conditions

Details

CI (apple) / macOS-latest-swift (generic/platform=macOS) (push) Blocked by required conditions

Details

CI (apple) / macOS-latest-swift (generic/platform=tvOS) (push) Blocked by required conditions

Details

CI (cann) / openEuler-latest-cann (aarch64, Release, 310p, off) (push) Waiting to run

Details

CI (cann) / openEuler-latest-cann (aarch64, Release, 910b, off) (push) Waiting to run

Details

CI (cann) / openEuler-latest-cann (aarch64, Release, 910b, on) (push) Waiting to run

Details

CI (cann) / openEuler-latest-cann (x86, Release, 310p, off) (push) Waiting to run

Details

CI (cann) / openEuler-latest-cann (x86, Release, 910b, off) (push) Waiting to run

Details

CI (cann) / openEuler-latest-cann (x86, Release, 910b, on) (push) Waiting to run

Details

CI (riscv) / ubuntu-riscv64-native-sanitizer (Debug, ADDRESS) (push) Waiting to run

Details

CI (riscv) / ubuntu-riscv64-native-sanitizer (Debug, THREAD) (push) Waiting to run

Details

CI (riscv) / ubuntu-riscv64-native-sanitizer (Debug, UNDEFINED) (push) Waiting to run

Details

CI (sanitize) / ubuntu-latest-sanitizer (Debug, ADDRESS) (push) Waiting to run

Details

CI (sanitize) / ubuntu-latest-sanitizer (Debug, THREAD) (push) Waiting to run

Details

CI (sanitize) / ubuntu-latest-sanitizer (Debug, UNDEFINED) (push) Waiting to run

Details

CI (openvino) / ubuntu-24-openvino-GPU (push) Has been cancelled

Details

CI (self-hosted) / ggml-ci-nvidia-cuda (push) Waiting to run

Details

CI (self-hosted) / ggml-ci-nvidia-vulkan-cm (push) Waiting to run

Details

CI (self-hosted) / ggml-ci-nvidia-vulkan-cm2 (push) Waiting to run

Details

CI (self-hosted) / ggml-ci-mac-metal (push) Waiting to run

Details

CI (self-hosted) / ggml-ci-mac-webgpu (push) Waiting to run

Details

CI (self-hosted) / ggml-ci-mac-vulkan (push) Waiting to run

Details

CI (self-hosted) / ggml-ci-linux-intel-vulkan (push) Waiting to run

Details

CI (self-hosted) / ggml-ci-win-intel-vulkan (push) Waiting to run

Details

CI (sycl) / ubuntu-24-sycl (fp16, ON) (push) Waiting to run

Details

CI (sycl) / ubuntu-24-sycl (fp32, OFF) (push) Waiting to run

Details

CI (sycl) / windows-latest-sycl (push) Waiting to run

Details

CI (vulkan) / ubuntu-24-vulkan-llvmpipe (push) Waiting to run

Details

CI / build-cmake-pkg (push) Waiting to run

Details

CI / macOS-latest-arm64 (push) Waiting to run

Details

CI / macOS-latest-x64 (push) Waiting to run

Details

CI / macOS-latest-arm64-webgpu (push) Waiting to run

Details

CI / ubuntu-cpu (arm64, ubuntu-24.04-arm) (push) Waiting to run

Details

CI / ubuntu-cpu (ppc64le, ubuntu-24.04-ppc64le) (push) Waiting to run

Details

CI / ubuntu-cpu (s390x, ubuntu-24.04-s390x) (push) Waiting to run

Details

CI / ubuntu-cpu (x64, ubuntu-22.04) (push) Waiting to run

Details

CI / android-arm64 (push) Waiting to run

Details

CI / ubuntu-latest-rpc (push) Waiting to run

Details

CI / ubuntu-24-vulkan (arm64, ubuntu-24.04-arm) (push) Waiting to run

Details

CI / ubuntu-24-vulkan (x64, ubuntu-24.04) (push) Waiting to run

Details

CI / ubuntu-24-webgpu (push) Waiting to run

Details

CI / ubuntu-24-webgpu-wasm (push) Waiting to run

Details

CI / ubuntu-22-hip (push) Waiting to run

Details

CI / ubuntu-22-musa (push) Waiting to run

Details

CI / windows-latest (arm64, llvm-arm64, -G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON) (push) Waiting to run

Details

CI / windows-latest (arm64, llvm-arm64-opencl-adreno, -G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON) (push) Waiting to run

Details

CI / windows-latest (x64, cpu-x64 (static), -G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF) (push) Waiting to run

Details

CI / windows-latest (x64, openblas-x64, -G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DG… (push) Waiting to run

Details

CI / windows-latest (x64, vulkan-x64, -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON) (push) Waiting to run

Details

CI / ubuntu-latest-cuda (push) Waiting to run

Details

CI / windows-2022-cuda (12.4) (push) Waiting to run

Details

CI / windows-latest-hip (push) Waiting to run

Details

CI / ubuntu-cpu-riscv64-native (push) Waiting to run

Details

CI / ggml-ci-x64-cpu-low-perf (push) Waiting to run

Details

CI / ggml-ci-arm64-cpu-low-perf (push) Waiting to run

Details

CI / ggml-ci-x64-cpu-high-perf (push) Waiting to run

Details

CI / ggml-ci-arm64-cpu-high-perf (push) Waiting to run

Details

CI / ggml-ci-arm64-cpu-high-perf-sve (push) Waiting to run

Details

CI / ggml-ci-arm64-cpu-kleidiai (push) Waiting to run

Details

CI / ggml-ci-arm64-cpu-kleidiai-graviton4 (push) Waiting to run

Details

EditorConfig Checker / editorconfig (push) Waiting to run

Details

Release / macOS-cpu (arm64, arm64, -DGGML_METAL_USE_BF16=ON -DGGML_METAL_EMBED_LIBRARY=ON, macos-14) (push) Waiting to run

Details

Release / macOS-cpu (arm64, arm64-kleidiai, -DGGML_METAL_USE_BF16=ON -DGGML_METAL_EMBED_LIBRARY=ON -DGGML_CPU_KLEIDIAI=ON, macos-14) (push) Waiting to run

Details

Release / macOS-cpu (x64, x64, -DGGML_METAL=OFF -DCMAKE_OSX_DEPLOYMENT_TARGET=13.3, macos-15-intel) (push) Waiting to run

Details

Release / ubuntu-cpu (arm64, ubuntu-24.04-arm) (push) Waiting to run

Details

Release / ubuntu-cpu (s390x, ubuntu-24.04-s390x) (push) Waiting to run

Details

Release / ubuntu-cpu (x64, ubuntu-22.04) (push) Waiting to run

Details

Release / ubuntu-vulkan (arm64, ubuntu-24.04-arm) (push) Waiting to run

Details

Release / ubuntu-vulkan (x64, ubuntu-22.04) (push) Waiting to run

Details

Release / android-arm64 (push) Waiting to run

Details

Release / ubuntu-24-openvino (push) Waiting to run

Details

Release / windows-cpu (arm64) (push) Waiting to run

Details

Release / windows-cpu (x64) (push) Waiting to run

Details

Release / windows (arm64, opencl-adreno, -G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON, ggml-opencl) (push) Waiting to run

Details

Release / windows (x64, vulkan, -DGGML_VULKAN=ON, ggml-vulkan) (push) Waiting to run

Details

Release / windows-cuda (12.4) (push) Waiting to run

Details

Release / windows-cuda (13.1) (push) Waiting to run

Details

Release / windows-sycl (push) Waiting to run

Details

Release / ubuntu-24-sycl (fp16, ON) (push) Waiting to run

Details

Release / ubuntu-24-sycl (fp32, OFF) (push) Waiting to run

Details

Release / ubuntu-22-rocm (7.2.1, x64, gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1150;gfx1200;gfx1201) (push) Waiting to run

Details

Release / windows-hip (gfx1150;gfx1151;gfx1200;gfx1201;gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032, radeon) (push) Waiting to run

Details

Release / ios-xcode-build (push) Waiting to run

Details

Release / openEuler-cann (aarch64, Release, 310p, off) (push) Waiting to run

Details

Release / openEuler-cann (aarch64, Release, 910b, on) (push) Waiting to run

Details

Release / openEuler-cann (x86, Release, 310p, off) (push) Waiting to run

Details

Release / openEuler-cann (x86, Release, 910b, on) (push) Waiting to run

Details

Release / release (push) Blocked by required conditions

Details

Server (sanitize) / server (RelWithDebInfo, ADDRESS) (push) Waiting to run

Details

Server (sanitize) / server (RelWithDebInfo, UNDEFINED) (push) Waiting to run

Details

Server (self-hosted) / server-metal (GPUx2, backend-sampling) (push) Waiting to run

Details

Server (self-hosted) / server-metal (GPUx2) (push) Waiting to run

Details

Server (self-hosted) / server-metal (GPUx1) (push) Waiting to run

Details

Server (self-hosted) / server-metal (GPUx1, backend-sampling) (push) Waiting to run

Details

Server / server (default) (push) Waiting to run

Details

Server / server (backend-sampling) (push) Waiting to run

Details

Server / server-windows (push) Waiting to run

Details

CI (openvino) / ubuntu-24-openvino-CPU (push) Has been cancelled

Details

CI (self-hosted) / ggml-ci-intel-openvino-gpu-low-perf (push) Has been cancelled

Details

[docs] add GIT.md with workflow and agent instructions

2026-04-30 18:11:44 +02:00

CMakeLists.txt

[docs] add GIT.md with workflow and agent instructions

2026-04-30 18:11:44 +02:00

eval-callback-profile.cpp

[docs] add GIT.md with workflow and agent instructions

2026-04-30 18:11:44 +02:00

eval-callback.cpp

common: fix missing exports in llama-common (#22340 )

2026-04-27 08:06:39 +03:00

README.md

build: rename main → llama-cli, server → llama-server, llava-cli → llama-llava-cli, etc... (#7809 )

2024-06-13 00:41:52 +01:00

README.md

llama.cpp/examples/eval-callback

A simple example which demonstrates how to use callback during the inference. It simply prints to the console all operations and tensor data.

Usage:

llama-eval-callback \
  --hf-repo ggml-org/models \
  --hf-file phi-2/ggml-model-q4_0.gguf \
  --model phi-2-q4_0.gguf \
  --prompt hello \
  --seed 42 \
  -ngl 33

Will print:

llm_load_tensors: offloaded 33/33 layers to GPU
...
llama_new_context_with_model: n_ctx      = 512
...
llama_new_context_with_model:      CUDA0 compute buffer size =   105.00 MiB
llama_new_context_with_model:  CUDA_Host compute buffer size =     6.01 MiB
llama_new_context_with_model: graph nodes  = 1225
llama_new_context_with_model: graph splits = 2
ggml_debug:                 inp_embd = (f32)   GET_ROWS(token_embd.weight{2560, 51200, 1, 1}, inp_tokens{1, 1, 1, 1}}) = {2560, 1, 1, 1}
                                     [
                                      [
                                       [ -0.0181,   0.0272,   0.0272, ...],
                                      ],
                                     ]
ggml_debug:                   norm-0 = (f32)       NORM(CUDA0#inp_embd#0{2560, 1, 1, 1}, }) = {2560, 1, 1, 1}
                                     [
                                      [
                                       [ -0.6989,   1.0636,   1.0636, ...],
                                      ],
                                     ]
ggml_debug:                 norm_w-0 = (f32)        MUL(norm-0{2560, 1, 1, 1}, blk.0.attn_norm.weight{2560, 1, 1, 1}}) = {2560, 1, 1, 1}
                                     [
                                      [
                                       [ -0.1800,   0.2817,   0.2632, ...],
                                      ],
                                     ]
ggml_debug:              attn_norm-0 = (f32)        ADD(norm_w-0{2560, 1, 1, 1}, blk.0.attn_norm.bias{2560, 1, 1, 1}}) = {2560, 1, 1, 1}
                                     [
                                      [
                                       [ -0.1863,   0.2970,   0.2604, ...],
                                      ],
                                     ]
ggml_debug:                   wqkv-0 = (f32)    MUL_MAT(blk.0.attn_qkv.weight{2560, 7680, 1, 1}, attn_norm-0{2560, 1, 1, 1}}) = {7680, 1, 1, 1}
                                     [
                                      [
                                       [ -1.1238,   1.2876,  -1.8086, ...],
                                      ],
                                     ]
ggml_debug:                   bqkv-0 = (f32)        ADD(wqkv-0{7680, 1, 1, 1}, blk.0.attn_qkv.bias{7680, 1, 1, 1}}) = {7680, 1, 1, 1}
                                     [
                                      [
                                       [ -1.1135,   1.4604,  -1.9226, ...],
                                      ],
                                     ]
ggml_debug:            bqkv-0 (view) = (f32)       VIEW(bqkv-0{7680, 1, 1, 1}, }) = {2560, 1, 1, 1}
                                     [
                                      [
                                       [ -1.1135,   1.4604,  -1.9226, ...],
                                      ],
                                     ]
ggml_debug:                   Qcur-0 = (f32)       CONT(bqkv-0 (view){2560, 1, 1, 1}, }) = {2560, 1, 1, 1}
                                     [
                                      [
                                       [ -1.1135,   1.4604,  -1.9226, ...],
                                      ],
                                     ]
ggml_debug:        Qcur-0 (reshaped) = (f32)    RESHAPE(Qcur-0{2560, 1, 1, 1}, }) = {80, 32, 1, 1}
                                     [
                                      [
                                       [ -1.1135,   1.4604,  -1.9226, ...],
                                       [ -0.3608,   0.5076,  -1.8866, ...],
                                       [  1.7643,   0.0273,  -2.1065, ...],
                                       ...
                                      ],
                                     ]
ggml_debug:                   Qcur-0 = (f32)       ROPE(Qcur-0 (reshaped){80, 32, 1, 1}, CUDA0#inp_pos#0{1, 1, 1, 1}}) = {80, 32, 1, 1}
                                     [
                                      [
                                       [ -1.1135,   1.4604,  -1.9226, ...],
                                       [ -0.3608,   0.5076,  -1.8866, ...],
                                       [  1.7643,   0.0273,  -2.1065, ...],
                                       ...
                                      ],
                                     ]