mirror of
https://github.com/RYDE-WORK/ktransformers.git
synced 2026-02-07 15:13:30 +08:00
update yaml example; update version idx; update docker file
This commit is contained in:
parent
6735beb5b6
commit
8747c099f2
@ -25,7 +25,7 @@ rm -rf /var/lib/apt/lists/* &&
|
|||||||
cd ktransformers &&
|
cd ktransformers &&
|
||||||
git submodule init &&
|
git submodule init &&
|
||||||
git submodule update &&
|
git submodule update &&
|
||||||
pip install ninja pyproject numpy &&
|
pip install ninja pyproject numpy cpufeature &&
|
||||||
pip install flash-attn &&
|
pip install flash-attn &&
|
||||||
CPU_INSTRUCT=NATIVE KTRANSFORMERS_FORCE_BUILD=TRUE TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9" pip install . --no-build-isolation --verbose &&
|
CPU_INSTRUCT=NATIVE KTRANSFORMERS_FORCE_BUILD=TRUE TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9" pip install . --no-build-isolation --verbose &&
|
||||||
pip cache purge
|
pip cache purge
|
||||||
|
|||||||
@ -5,7 +5,7 @@ Description :
|
|||||||
Author : kkk1nak0
|
Author : kkk1nak0
|
||||||
Date : 2024-08-15 07:34:46
|
Date : 2024-08-15 07:34:46
|
||||||
Version : 1.0.0
|
Version : 1.0.0
|
||||||
LastEditors : chenxl
|
LastEditors : Azure-Tang
|
||||||
LastEditTime : 2024-08-28 15:19:03
|
LastEditTime : 2024-08-29 22:35:51
|
||||||
'''
|
'''
|
||||||
__version__ = "0.1.3"
|
__version__ = "0.1.4"
|
||||||
@ -7,7 +7,7 @@
|
|||||||
prefill_device: "cpu"
|
prefill_device: "cpu"
|
||||||
|
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([0-9])\\."
|
name: "^model\\.layers\\.([0-9]|[1][0-4])\\."
|
||||||
class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding
|
class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.RoPE.YarnRotaryEmbedding
|
class: ktransformers.operators.RoPE.YarnRotaryEmbedding
|
||||||
@ -15,7 +15,7 @@
|
|||||||
generate_device: "cuda:0"
|
generate_device: "cuda:0"
|
||||||
prefill_device: "cuda:0"
|
prefill_device: "cuda:0"
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([1][0-9])\\."
|
name: "^model\\.layers\\.([2][0-9]|[1][5-9])\\."
|
||||||
class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding
|
class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.RoPE.YarnRotaryEmbedding
|
class: ktransformers.operators.RoPE.YarnRotaryEmbedding
|
||||||
@ -23,7 +23,7 @@
|
|||||||
generate_device: "cuda:1"
|
generate_device: "cuda:1"
|
||||||
prefill_device: "cuda:1"
|
prefill_device: "cuda:1"
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([2][0-9])\\."
|
name: "^model\\.layers\\.([3][0-9]|[4][0-4])\\."
|
||||||
class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding
|
class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.RoPE.YarnRotaryEmbedding
|
class: ktransformers.operators.RoPE.YarnRotaryEmbedding
|
||||||
@ -31,7 +31,7 @@
|
|||||||
generate_device: "cuda:2"
|
generate_device: "cuda:2"
|
||||||
prefill_device: "cuda:2"
|
prefill_device: "cuda:2"
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([345][0-9])\\."
|
name: "^model\\.layers\\.([5][0-9]|[4][5-9])\\."
|
||||||
class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding
|
class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.RoPE.YarnRotaryEmbedding
|
class: ktransformers.operators.RoPE.YarnRotaryEmbedding
|
||||||
@ -40,7 +40,7 @@
|
|||||||
prefill_device: "cuda:3"
|
prefill_device: "cuda:3"
|
||||||
|
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([0-9])\\.(?!self_attn).*$" # regular expression
|
name: "^model\\.layers\\.([0-9]|[1][0-4])\\.(?!self_attn\\.kv_b_proj).*$" # regular expression
|
||||||
class: torch.nn.Linear # only match modules matching name and class simultaneously
|
class: torch.nn.Linear # only match modules matching name and class simultaneously
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
|
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
|
||||||
@ -50,7 +50,7 @@
|
|||||||
generate_op: "KLinearMarlin"
|
generate_op: "KLinearMarlin"
|
||||||
prefill_op: "KLinearTorch"
|
prefill_op: "KLinearTorch"
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([1][0-9])\\.(?!self_attn).*$" # regular expression
|
name: "^model\\.layers\\.([2][0-9]|[1][5-9])\\.(?!self_attn\\.kv_b_proj).*$" # regular expression
|
||||||
class: torch.nn.Linear # only match modules matching name and class simultaneously
|
class: torch.nn.Linear # only match modules matching name and class simultaneously
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
|
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
|
||||||
@ -60,7 +60,7 @@
|
|||||||
generate_op: "KLinearMarlin"
|
generate_op: "KLinearMarlin"
|
||||||
prefill_op: "KLinearTorch"
|
prefill_op: "KLinearTorch"
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([2][0-9])\\.(?!self_attn).*$" # regular expression
|
name: "^model\\.layers\\.([3][0-9]|[4][0-4])\\.(?!self_attn\\.kv_b_proj).*$" # regular expression
|
||||||
class: torch.nn.Linear # only match modules matching name and class simultaneously
|
class: torch.nn.Linear # only match modules matching name and class simultaneously
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
|
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
|
||||||
@ -70,7 +70,7 @@
|
|||||||
generate_op: "KLinearMarlin"
|
generate_op: "KLinearMarlin"
|
||||||
prefill_op: "KLinearTorch"
|
prefill_op: "KLinearTorch"
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([345][0-9])\\.(?!self_attn).*$" # regular expression
|
name: "^model\\.layers\\.([5][0-9]|[4][5-9])\\.(?!self_attn\\.kv_b_proj).*$" # regular expression
|
||||||
class: torch.nn.Linear # only match modules matching name and class simultaneously
|
class: torch.nn.Linear # only match modules matching name and class simultaneously
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
|
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
|
||||||
@ -81,7 +81,7 @@
|
|||||||
prefill_op: "KLinearTorch"
|
prefill_op: "KLinearTorch"
|
||||||
|
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([0-9])\\.mlp$"
|
name: "^model\\.layers\\.([0-9]|[1][0-4])\\.mlp$"
|
||||||
class: ktransformers.models.modeling_deepseek.DeepseekV2MoE
|
class: ktransformers.models.modeling_deepseek.DeepseekV2MoE
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.experts.KDeepseekV2MoE # mlp module with custom forward function
|
class: ktransformers.operators.experts.KDeepseekV2MoE # mlp module with custom forward function
|
||||||
@ -89,7 +89,7 @@
|
|||||||
generate_device: "cuda:0"
|
generate_device: "cuda:0"
|
||||||
prefill_device: "cuda:0"
|
prefill_device: "cuda:0"
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([1][0-9])\\.mlp$"
|
name: "^model\\.layers\\.([2][0-9]|[1][5-9])\\.mlp$"
|
||||||
class: ktransformers.models.modeling_deepseek.DeepseekV2MoE
|
class: ktransformers.models.modeling_deepseek.DeepseekV2MoE
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.experts.KDeepseekV2MoE # mlp module with custom forward function
|
class: ktransformers.operators.experts.KDeepseekV2MoE # mlp module with custom forward function
|
||||||
@ -97,7 +97,7 @@
|
|||||||
generate_device: "cuda:1"
|
generate_device: "cuda:1"
|
||||||
prefill_device: "cuda:1"
|
prefill_device: "cuda:1"
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([2][0-9])\\.mlp$"
|
name: "^model\\.layers\\.([3][0-9]|[4][0-4])\\.mlp$"
|
||||||
class: ktransformers.models.modeling_deepseek.DeepseekV2MoE
|
class: ktransformers.models.modeling_deepseek.DeepseekV2MoE
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.experts.KDeepseekV2MoE # mlp module with custom forward function
|
class: ktransformers.operators.experts.KDeepseekV2MoE # mlp module with custom forward function
|
||||||
@ -105,7 +105,7 @@
|
|||||||
generate_device: "cuda:2"
|
generate_device: "cuda:2"
|
||||||
prefill_device: "cuda:2"
|
prefill_device: "cuda:2"
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([345][0-9])\\.mlp$"
|
name: "^model\\.layers\\.([5][0-9]|[4][5-9])\\.mlp$"
|
||||||
class: ktransformers.models.modeling_deepseek.DeepseekV2MoE
|
class: ktransformers.models.modeling_deepseek.DeepseekV2MoE
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.experts.KDeepseekV2MoE # mlp module with custom forward function
|
class: ktransformers.operators.experts.KDeepseekV2MoE # mlp module with custom forward function
|
||||||
@ -114,7 +114,7 @@
|
|||||||
prefill_device: "cuda:3"
|
prefill_device: "cuda:3"
|
||||||
|
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([0-9])\\.mlp\\.experts$"
|
name: "^model\\.layers\\.([0-9]|[1][0-4])\\.mlp\\.experts$"
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism
|
class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism
|
||||||
kwargs:
|
kwargs:
|
||||||
@ -125,7 +125,7 @@
|
|||||||
out_device: "cuda:0"
|
out_device: "cuda:0"
|
||||||
recursive: False # don't recursively inject submodules of this module
|
recursive: False # don't recursively inject submodules of this module
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([1][0-9])\\.mlp\\.experts$"
|
name: "^model\\.layers\\.([2][0-9]|[1][5-9])\\.mlp\\.experts$"
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism
|
class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism
|
||||||
kwargs:
|
kwargs:
|
||||||
@ -136,7 +136,7 @@
|
|||||||
out_device: "cuda:1"
|
out_device: "cuda:1"
|
||||||
recursive: False # don't recursively inject submodules of this module
|
recursive: False # don't recursively inject submodules of this module
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([2][0-9])\\.mlp\\.experts$"
|
name: "^model\\.layers\\.([3][0-9]|[4][0-4])\\.mlp\\.experts$"
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism
|
class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism
|
||||||
kwargs:
|
kwargs:
|
||||||
@ -147,7 +147,7 @@
|
|||||||
out_device: "cuda:2"
|
out_device: "cuda:2"
|
||||||
recursive: False # don't recursively inject submodules of this module
|
recursive: False # don't recursively inject submodules of this module
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([345][0-9])\\.mlp\\.experts$"
|
name: "^model\\.layers\\.([5][0-9]|[4][5-9])\\.mlp\\.experts$"
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism
|
class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism
|
||||||
kwargs:
|
kwargs:
|
||||||
@ -159,28 +159,28 @@
|
|||||||
recursive: False # don't recursively inject submodules of this module
|
recursive: False # don't recursively inject submodules of this module
|
||||||
|
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([0-9])\\.self_attn$"
|
name: "^model\\.layers\\.([0-9]|[1][0-4])\\.self_attn$"
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.attention.KDeepseekV2Attention # optimized MLA implementation
|
class: ktransformers.operators.attention.KDeepseekV2Attention # optimized MLA implementation
|
||||||
kwargs:
|
kwargs:
|
||||||
generate_device: "cuda:0"
|
generate_device: "cuda:0"
|
||||||
prefill_device: "cuda:0"
|
prefill_device: "cuda:0"
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([1][0-9])\\.self_attn$"
|
name: "^model\\.layers\\.([2][0-9]|[1][5-9])\\.self_attn$"
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.attention.KDeepseekV2Attention # optimized MLA implementation
|
class: ktransformers.operators.attention.KDeepseekV2Attention # optimized MLA implementation
|
||||||
kwargs:
|
kwargs:
|
||||||
generate_device: "cuda:1"
|
generate_device: "cuda:1"
|
||||||
prefill_device: "cuda:1"
|
prefill_device: "cuda:1"
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([2][0-9])\\.self_attn$"
|
name: "^model\\.layers\\.([3][0-9]|[4][0-4])\\.self_attn$"
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.attention.KDeepseekV2Attention # optimized MLA implementation
|
class: ktransformers.operators.attention.KDeepseekV2Attention # optimized MLA implementation
|
||||||
kwargs:
|
kwargs:
|
||||||
generate_device: "cuda:2"
|
generate_device: "cuda:2"
|
||||||
prefill_device: "cuda:2"
|
prefill_device: "cuda:2"
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([345][0-9])\\.self_attn$"
|
name: "^model\\.layers\\.([5][0-9]|[4][5-9])\\.self_attn$"
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.attention.KDeepseekV2Attention # optimized MLA implementation
|
class: ktransformers.operators.attention.KDeepseekV2Attention # optimized MLA implementation
|
||||||
kwargs:
|
kwargs:
|
||||||
@ -194,35 +194,35 @@
|
|||||||
kwargs:
|
kwargs:
|
||||||
per_layer_prefill_intput_threshold: 0 # 0 is close layer wise prefill
|
per_layer_prefill_intput_threshold: 0 # 0 is close layer wise prefill
|
||||||
transfer_map:
|
transfer_map:
|
||||||
10: "cuda:1"
|
15: "cuda:1"
|
||||||
20: "cuda:2"
|
30: "cuda:2"
|
||||||
30: "cuda:3"
|
45: "cuda:3"
|
||||||
|
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([0-9])\\."
|
name: "^model\\.layers\\.([0-9]|[1][0-4])\\."
|
||||||
replace:
|
replace:
|
||||||
class: "default"
|
class: "default"
|
||||||
kwargs:
|
kwargs:
|
||||||
generate_device: "cuda:0"
|
generate_device: "cuda:0"
|
||||||
prefill_device: "cuda:0"
|
prefill_device: "cuda:0"
|
||||||
- match:
|
- match:
|
||||||
name: "(^model\\.layers\\.([1][0-9])\\.)"
|
name: "(^model\\.layers\\.([2][0-9]|[1][5-9])\\.)"
|
||||||
replace:
|
replace:
|
||||||
class: "default"
|
class: "default"
|
||||||
kwargs:
|
kwargs:
|
||||||
generate_device: "cuda:1"
|
generate_device: "cuda:1"
|
||||||
prefill_device: "cuda:1"
|
prefill_device: "cuda:1"
|
||||||
- match:
|
- match:
|
||||||
name: "(^model\\.layers\\.([2][0-9])\\.)"
|
name: "(^model\\.layers\\.([3][0-9]|[4][0-4])\\.)"
|
||||||
replace:
|
replace:
|
||||||
class: "default"
|
class: "default"
|
||||||
kwargs:
|
kwargs:
|
||||||
generate_device: "cuda:2"
|
generate_device: "cuda:2"
|
||||||
prefill_device: "cuda:2"
|
prefill_device: "cuda:2"
|
||||||
- match:
|
- match:
|
||||||
name: "(^model\\.layers\\.([345][0-9])\\.)|(^model.norm)|(^lm_head)"
|
name: "(^model\\.layers\\.([5][0-9]|[4][5-9])\\.)|(^model.norm)|(^lm_head)"
|
||||||
replace:
|
replace:
|
||||||
class: "default"
|
class: "default"
|
||||||
kwargs:
|
kwargs:
|
||||||
generate_device: "cuda:3"
|
generate_device: "cuda:3"
|
||||||
prefill_device: "cuda:3"
|
prefill_device: "cuda:3"
|
||||||
@ -24,7 +24,7 @@
|
|||||||
prefill_device: "cuda:1"
|
prefill_device: "cuda:1"
|
||||||
|
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.(?!self_attn).*$" # regular expression
|
name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.(?!self_attn\\.kv_b_proj).*$" # regular expression
|
||||||
class: torch.nn.Linear # only match modules matching name and class simultaneously
|
class: torch.nn.Linear # only match modules matching name and class simultaneously
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
|
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
|
||||||
@ -35,7 +35,7 @@
|
|||||||
prefill_op: "KLinearTorch"
|
prefill_op: "KLinearTorch"
|
||||||
|
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.([345][0-9])\\.(?!self_attn).*$" # regular expression
|
name: "^model\\.layers\\.([345][0-9])\\.(?!self_attn\\.kv_b_proj).*$" # regular expression
|
||||||
class: torch.nn.Linear # only match modules matching name and class simultaneously
|
class: torch.nn.Linear # only match modules matching name and class simultaneously
|
||||||
replace:
|
replace:
|
||||||
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
|
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user