opencl: fix rms_norm_mul (#17250)

* opencl: use subgrroup reduce for reduction in rms_norm_mul

* opencl: add comment about workgroup size
This commit is contained in:
lhez
2025-11-15 17:40:14 -08:00
committed by GitHub
parent 4db5641210
commit 52e5d421f1
2 changed files with 26 additions and 11 deletions
+1 -1
View File
@@ -5705,7 +5705,7 @@ static void ggml_opencl_op_rms_norm_fused(ggml_backend_t backend, ggml_tensor *
CL_CHECK(clSetKernelArg(kernel, 21, sizeof(cl_ulong), &nb2));
CL_CHECK(clSetKernelArg(kernel, 22, sizeof(cl_ulong), &nb3));
CL_CHECK(clSetKernelArg(kernel, 23, sizeof(float), &eps));
CL_CHECK(clSetKernelArg(kernel, 24, sizeof(float)*nth/sgs, NULL));
CL_CHECK(clSetKernelArg(kernel, 24, sizeof(float)*sgs, NULL));
backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst);
}