diff --git a/ktransformers/ktransformers_ext/CMakeLists.txt b/ktransformers/ktransformers_ext/CMakeLists.txt index ecce9b7..22623a5 100644 --- a/ktransformers/ktransformers_ext/CMakeLists.txt +++ b/ktransformers/ktransformers_ext/CMakeLists.txt @@ -209,6 +209,7 @@ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/llama.cpp ${CMAKE include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../third_party) if (WIN32) include_directories("$ENV{CUDA_PATH}/include") + add_compile_definitions(KTRANSFORMERS_USE_CUDA=1) elseif (UNIX) if (KTRANSFORMERS_USE_CUDA) find_package(CUDA REQUIRED) diff --git a/ktransformers/ktransformers_ext/operators/kvcache/kvcache_attn.cpp b/ktransformers/ktransformers_ext/operators/kvcache/kvcache_attn.cpp index c59cb94..4190c03 100644 --- a/ktransformers/ktransformers_ext/operators/kvcache/kvcache_attn.cpp +++ b/ktransformers/ktransformers_ext/operators/kvcache/kvcache_attn.cpp @@ -10,6 +10,8 @@ #include "kvcache.h" +#include + void KVCache::attention_kvhead_(const uint16_t *q_in_data, ggml_fp16_t *output, float *attn_lse, int batch_size, Backend *backend) { diff --git a/ktransformers/ktransformers_ext/operators/kvcache/kvcache_load_dump.cpp b/ktransformers/ktransformers_ext/operators/kvcache/kvcache_load_dump.cpp index eadf90f..4de217f 100644 --- a/ktransformers/ktransformers_ext/operators/kvcache/kvcache_load_dump.cpp +++ b/ktransformers/ktransformers_ext/operators/kvcache/kvcache_load_dump.cpp @@ -9,6 +9,9 @@ **/ #include "kvcache.h" + +#include + void KVCache::load_kvcache(std::string tensor_file_path, Backend *backend) { // Timer start auto start = std::chrono::high_resolution_clock::now(); diff --git a/ktransformers/ktransformers_ext/operators/kvcache/kvcache_read_write.cpp b/ktransformers/ktransformers_ext/operators/kvcache/kvcache_read_write.cpp index 998f1b0..0104905 100644 --- a/ktransformers/ktransformers_ext/operators/kvcache/kvcache_read_write.cpp +++ b/ktransformers/ktransformers_ext/operators/kvcache/kvcache_read_write.cpp @@ -10,6 +10,8 @@ #include "kvcache.h" +#include + void KVCache::get_anchor_one_block(ggml_fp16_t *anchor, int layer_id, int block_idx, Backend *backend) { // Timer start diff --git a/ktransformers/ktransformers_ext/operators/kvcache/kvcache_utils.cpp b/ktransformers/ktransformers_ext/operators/kvcache/kvcache_utils.cpp index f1d6f7d..c57d475 100644 --- a/ktransformers/ktransformers_ext/operators/kvcache/kvcache_utils.cpp +++ b/ktransformers/ktransformers_ext/operators/kvcache/kvcache_utils.cpp @@ -10,6 +10,8 @@ #include "kvcache.h" +#include + std::string ggml_type_to_string(ggml_type type) { switch (type) { case GGML_TYPE_F32: