Merge pull request #151 from kvcache-ai/update-yaml

[update] Update marlin expert yaml example to fully use gpu.
This commit is contained in:
Azure 2025-02-12 12:14:37 +08:00 committed by GitHub
commit f30c6482a5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -79,6 +79,24 @@
generate_device: "cuda:1"
prefill_device: "cuda:1"
- match:
name: "^model\\.layers\\.(0|[1-4])\\.mlp\\.experts$" # inject experts in layer 0~4 as marlin expert
replace:
class: ktransformers.operators.experts.KTransformersExperts
kwargs:
generate_device: "cuda:0" # run in cuda:0
generate_op: "KExpertsMarlin"
recursive: False
- match:
name: "^model\\.layers\\.([3][0])\\.mlp\\.experts$" # inject experts in layer 30~31 as marlin expert
replace:
class: ktransformers.operators.experts.KTransformersExperts
kwargs:
generate_device: "cuda:1"
generate_op: "KExpertsMarlin"
recursive: False
- match:
name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.mlp\\.experts$"
replace:
@ -139,5 +157,5 @@
replace:
class: "default"
kwargs:
generate_device: "cuda:1"
prefill_device: "cuda:1"
generate_device: "cuda:0"
prefill_device: "cuda:0"