Files
llama.cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp
T
Kevin Pouget ffaafde16f ggml-virtgpu: improve the reliability of the code (#19846)
* ggml-virtgpu-backend: validate the consistency of the received objects

This patch adds consistency checks in the
ggml-virtgpu-backend (running on the host side) to ensure that the
data received from the guest is consistent (valid pointers, valid
sizes and offsets).

* ggml-virtgpu-backend: add fallback/skips for optional ggml backend methods

```
  1. bck->iface.synchronize(bck)
  2. buft->iface.get_alloc_size(buft, op)
  3. buft->iface.get_max_size(buft)
```

these three methods are optional in the GGML interface. `get_max_size`
was already properly defaulted, but `backend sychronize` and `butf
get_max_size` would have segfaulted the backend if not implemented.

* ggml-virtgpu-backend: fix log format missing argument

* ggml-virtgpu-backend: improve the abort message

* ggml-virtgpu-backend: more safety checks

* ggml-virtgpu-backend: new error code

* ggml-virtgpu-backend: initialize all the error codes

* ggml-virtgpu: add a missing comment generated by the code generator

* ggml-virtgpu: add the '[virtgpu]' prefix to the device/buffer names

* ggml-virtgpu: apir_device_buffer_from_ptr: improve the error message

* ggml-virtgpu: shared: make it match the latest api_remoting.h of Virglrenderer APIR

(still unmerged)

* ggml-virtgpu: update the code generator to have dispatch_command_name in a host/guest shared file

* ggml-virtgpu: REMOTE_CALL: fail if the backend returns an error

* docs/backend/VirtGPU.md: indicate that the RAM+VRAM size is limed to 64 GB with libkrun

* ggml-virtgpu: turn off clang-format header ordering for some of the files

Compilation breaks when ordered alphabetically.

* ggml-virtgpu: clang-format

* ggml-virtgpu/backend/shared/api_remoting: better comments for the APIR return codes
2026-02-26 20:00:57 +08:00

193 lines
5.5 KiB
C++

#include "virtgpu-forward-impl.h"
#include "virtgpu-shm.h"
int apir_device_get_count(virtgpu * gpu) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_COUNT);
REMOTE_CALL(gpu, encoder, decoder, ret);
int32_t dev_count = -1;
apir_decode_int32_t(decoder, &dev_count);
remote_call_finish(gpu, encoder, decoder);
return dev_count;
}
char * apir_device_get_name(virtgpu * gpu) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_NAME);
REMOTE_CALL(gpu, encoder, decoder, ret);
const size_t string_size = apir_decode_array_size_unchecked(decoder);
char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
if (!string) {
GGML_LOG_ERROR(GGML_VIRTGPU "%s: Could not allocate the device name buffer\n", __func__);
return NULL;
}
apir_decode_char_array(decoder, string, string_size);
remote_call_finish(gpu, encoder, decoder);
return string;
}
char * apir_device_get_description(virtgpu * gpu) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION);
REMOTE_CALL(gpu, encoder, decoder, ret);
const size_t string_size = apir_decode_array_size_unchecked(decoder);
char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
if (!string) {
GGML_LOG_ERROR(GGML_VIRTGPU "%s: Could not allocate the device description buffer\n", __func__);
return NULL;
}
apir_decode_char_array(decoder, string, string_size);
remote_call_finish(gpu, encoder, decoder);
return string;
}
uint32_t apir_device_get_type(virtgpu * gpu) {
static uint32_t dev_type = 255;
if (dev_type != 255) {
return dev_type;
}
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_TYPE);
REMOTE_CALL(gpu, encoder, decoder, ret);
apir_decode_uint32_t(decoder, &dev_type);
remote_call_finish(gpu, encoder, decoder);
return dev_type;
}
void apir_device_get_memory(virtgpu * gpu, size_t * free, size_t * total) {
static size_t dev_free = 0;
static size_t dev_total = 0;
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_MEMORY);
REMOTE_CALL(gpu, encoder, decoder, ret);
apir_decode_size_t(decoder, &dev_free);
apir_decode_size_t(decoder, &dev_total);
*free = dev_free;
*total = dev_total;
remote_call_finish(gpu, encoder, decoder);
return;
}
bool apir_device_supports_op(virtgpu * gpu, const ggml_tensor * op) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP);
apir_encode_ggml_tensor_inline(encoder, op);
REMOTE_CALL(gpu, encoder, decoder, ret);
bool supports_op;
apir_decode_bool_t(decoder, &supports_op);
remote_call_finish(gpu, encoder, decoder);
return supports_op;
}
apir_buffer_type_host_handle_t apir_device_get_buffer_type(virtgpu * gpu) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE);
REMOTE_CALL(gpu, encoder, decoder, ret);
apir_buffer_type_host_handle_t buft_handle;
apir_decode_apir_buffer_type_host_handle_t(decoder, &buft_handle);
remote_call_finish(gpu, encoder, decoder);
return buft_handle;
}
void apir_device_get_props(virtgpu * gpu,
bool * async,
bool * host_buffer,
bool * buffer_from_host_ptr,
bool * events) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_PROPS);
REMOTE_CALL(gpu, encoder, decoder, ret);
apir_decode_bool_t(decoder, async);
apir_decode_bool_t(decoder, host_buffer);
apir_decode_bool_t(decoder, buffer_from_host_ptr);
apir_decode_bool_t(decoder, events);
remote_call_finish(gpu, encoder, decoder);
return;
}
apir_buffer_context_t apir_device_buffer_from_ptr(virtgpu * gpu, size_t size, size_t max_tensor_size) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
apir_buffer_context_t buffer_context;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR);
if (virtgpu_shmem_create(gpu, size, &buffer_context.shmem)) {
GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate %ldb of guest-host shared buffer", __func__, size);
}
apir_encode_virtgpu_shmem_res_id(encoder, buffer_context.shmem.res_id);
apir_encode_size_t(encoder, &size);
apir_encode_size_t(encoder, &max_tensor_size);
REMOTE_CALL(gpu, encoder, decoder, ret);
apir_decode_apir_buffer_host_handle_t(decoder, &buffer_context.host_handle);
buffer_context.buft_host_handle = apir_decode_apir_buffer_type_host_handle(decoder);
remote_call_finish(gpu, encoder, decoder);
return buffer_context;
}