b4e335d8dc
This change combines the rms_norm+mul and rope+view+set_rows fusions to allow fusing the whole sequence together. This comes up in Qwen3, Bailing, and some other models.
12 lines
264 B
Plaintext
12 lines
264 B
Plaintext
#version 450
|
|
|
|
#include "rope_head.glsl"
|
|
#include "rope_funcs.glsl"
|
|
|
|
void main() {
|
|
const uint i0 = 2*gl_GlobalInvocationID.y;
|
|
// i1 is actually i2*nb2+i1, but the rows are contiguous
|
|
const uint i1 = gl_GlobalInvocationID.x;
|
|
rope_neox(i0, i1, pc);
|
|
}
|