mirror of
https://github.com/RYDE-WORK/llama.cpp.git
synced 2026-02-07 00:03:16 +08:00
* CUDA: use mma PTX instructions for FlashAttention * __shfl_sync workaround for movmatrix * add __shfl_sync to HIP Co-authored-by: Diego Devesa <slarengh@gmail.com>
11 lines
318 B
Plaintext
11 lines
318 B
Plaintext
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
|
|
|
#include "../fattn-mma-f16.cuh"
|
|
|
|
DECL_FATTN_MMA_F16_CASE(64, 16);
|
|
DECL_FATTN_MMA_F16_CASE(80, 16);
|
|
DECL_FATTN_MMA_F16_CASE(96, 16);
|
|
DECL_FATTN_MMA_F16_CASE(112, 16);
|
|
DECL_FATTN_MMA_F16_CASE(128, 16);
|
|
DECL_FATTN_MMA_F16_CASE(256, 16);
|