diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 4a8de5e..dd406df 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -4,12 +4,12 @@ on: push: branches: - main - - server_support + # - server_support pull_request: branches: - main - - server_support + # - server_support defaults: run: diff --git a/doc/SUMMARY.md b/doc/SUMMARY.md index 7645018..bf5579f 100644 --- a/doc/SUMMARY.md +++ b/doc/SUMMARY.md @@ -7,12 +7,11 @@ - [Injection Tutorial](en/injection_tutorial.md) # Server -- [Server](zh/api/server/README.md) - [Server](en/api/server/server.md) - - [Website](zh/api/server/website.md) - - [Tabby](zh/api/server/tabby.md) + - [Website](en/api/server/website.md) + - [Tabby](en/api/server/tabby.md) # For Developer -- [For Developer](en/injection_tutorial.md) +- [Makefile Usage](en/makefile_usage.md) # FAQ - [FAQ](en/FAQ.md) diff --git a/doc/en/V3-success.md b/doc/en/V3-success.md index af69f27..fed1664 100644 --- a/doc/en/V3-success.md +++ b/doc/en/V3-success.md @@ -6,5 +6,6 @@ ### Case 2 - Configuration: Dual Xeon 6430 32C processors, totaling 64 cores and 128 threads, 480GB DDR5 memory, single 4090 24G graphics card - Performance: Running speed approximately 6-8 tokens per second -# NOTE -If there are any other configurations that have been successfully run, please feel free to let us know. We will keep updating for everyone to refer to when reproducing. (It has been found that it also works on 2080, AMD, etc. (doge : ) https://docs.qq.com/smartsheet/form/AVxgQOYhhNfl%2FBB08J2%2Fv3rnnq?tab=BB08J2 \ No newline at end of file +## NOTE +If there are any other configurations that have been successfully run, please feel free to let us know. We will keep updating for everyone to refer to when reproducing. (It has been found that it also works on 2080, AMD, etc. (doge : ) +[click here](https://docs.qq.com/smartsheet/form/AVxgQOYhhNfl%2FBB08J2%2Fv3rnnq?tab=BB08J2) \ No newline at end of file diff --git a/doc/zh/api/server/README.md b/doc/zh/api/server/README.md deleted file mode 100644 index a0f47f4..0000000 --- a/doc/zh/api/server/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# Server -Still Under Construction... (May have bugs and lack of documentation) \ No newline at end of file diff --git a/ktransformers/operators/experts.py b/ktransformers/operators/experts.py index ecfbca0..274a3ca 100644 --- a/ktransformers/operators/experts.py +++ b/ktransformers/operators/experts.py @@ -576,8 +576,6 @@ class KQwen2MoeSparseMoeBlock(BaseInjectedModule, Qwen2MoeSparseMoeBlock): routing_weights_expert = routing_weights.to(self.experts.device) if isinstance(self.experts, KExpertsBase) else routing_weights_expert.cpu() shared_expert_output = self.shared_expert(hidden_states) - tmp = self.shared_expert_gate(hidden_states) - print("shared_expert_gate shape ", tmp.shape) shared_expert_output = ( F.sigmoid(self.shared_expert_gate(hidden_states)) * shared_expert_output )