mirror of
https://github.com/RYDE-WORK/llama.cpp.git
synced 2026-02-01 21:23:14 +08:00
CUDA: fix shared memory access condition for mmv (#10740)
This commit is contained in:
parent
c37fb4cf62
commit
26a8406ba9
@ -57,7 +57,7 @@ static __global__ void mul_mat_vec(
|
|||||||
if (block_size > WARP_SIZE) {
|
if (block_size > WARP_SIZE) {
|
||||||
buf_iw[tid/WARP_SIZE] = sumf;
|
buf_iw[tid/WARP_SIZE] = sumf;
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if (tid > WARP_SIZE) {
|
if (tid >= WARP_SIZE) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
sumf = buf_iw[tid];
|
sumf = buf_iw[tid];
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user