From eb38b903175b94ad8490a6f3e04469f5cf1ad73b Mon Sep 17 00:00:00 2001 From: "Y.W. Fang" <1157670798@qq.com> Date: Thu, 11 Apr 2024 13:40:56 +0800 Subject: [PATCH] unify mbpp evaluation set of moe --- README-en.md | 15 ++++++++------- README.md | 14 +++++++------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/README-en.md b/README-en.md index 119aa63..065ea40 100644 --- a/README-en.md +++ b/README-en.md @@ -303,6 +303,7 @@ print(model.response("<用户>Write an acrostic poem with the word MINICPM (One | MiniCPM-2.4B-128k | 27.32 | 37.68 | 98.31 | 99.83 | 9 | 29.69 | 23.06 | 16.33 | 15.73 | 9.5 | 0 | 4.29 | 22.08 | 0 | #### MiniCPM-MoE-8x2B Evaluation +#### MiniCPM-MoE-8x2B模型评测
@@ -314,7 +315,7 @@ print(model.response("<用户>Write an acrostic poem with the word MINICPM (One - + @@ -327,7 +328,7 @@ print(model.response("<用户>Write an acrostic poem with the word MINICPM (One - + @@ -349,7 +350,7 @@ print(model.response("<用户>Write an acrostic poem with the word MINICPM (One - + @@ -360,7 +361,7 @@ print(model.response("<用户>Write an acrostic poem with the word MINICPM (One - + @@ -371,7 +372,7 @@ print(model.response("<用户>Write an acrostic poem with the word MINICPM (One - + @@ -382,7 +383,7 @@ print(model.response("<用户>Write an acrostic poem with the word MINICPM (One - + @@ -393,7 +394,7 @@ print(model.response("<用户>Write an acrostic poem with the word MINICPM (One - + diff --git a/README.md b/README.md index b873b24..8c19215 100644 --- a/README.md +++ b/README.md @@ -343,7 +343,7 @@ print(model.response("<用户>山东省最高的山是哪座山, 它比黄山高 - + @@ -356,7 +356,7 @@ print(model.response("<用户>山东省最高的山是哪座山, 它比黄山高 - + @@ -378,7 +378,7 @@ print(model.response("<用户>山东省最高的山是哪座山, 它比黄山高 - + @@ -389,7 +389,7 @@ print(model.response("<用户>山东省最高的山是哪座山, 它比黄山高 - + @@ -400,7 +400,7 @@ print(model.response("<用户>山东省最高的山是哪座山, 它比黄山高 - + @@ -411,7 +411,7 @@ print(model.response("<用户>山东省最高的山是哪座山, 它比黄山高 - + @@ -422,7 +422,7 @@ print(model.response("<用户>山东省最高的山是哪座山, 它比黄山高 - +
CEval CMMLU HumanEvalMBPPMBPP† GSM8K MATH- - 22.633.0†33.0 42.2 6.24
- - 32.344.4†44.4 46.4 24.3
74.1 73.1 3637.4†37.4 62.5 20.3
40.6 42.5 26.839.2†39.2 18.8 4.3
51.13 51.07 50.0047.3135.93 53.83 10.24
58.11 58.80 55.4951.0541.68 61.56 10.52
CEval CMMLU HumanEvalMBPPMBPP† GSM8K MATH- - 22.633.0†33.0 42.2 6.24
- - 32.344.4†44.4 46.4 24.3
74.1 73.1 3637.4†37.4 62.5 20.3
40.6 42.5 26.839.2†39.2 18.8 4.3
51.13 51.07 50.0047.3135.93 53.83 10.24
58.11 58.80 55.4951.0541.68 61.56 10.52