From 07a0555016a967abbddfeda26354b750a745eea5 Mon Sep 17 00:00:00 2001
From: liam <yzwliam@126.com>
Date: Tue, 18 Feb 2025 12:52:17 +0800
Subject: [PATCH] :zap: fix device and add test

---
 .gitignore                                    |   4 +
 Makefile                                      |   6 +-
 .../backend/interfaces/ktransformers.py       |   3 +
 ktransformers/tests/mmlu_test.py              | 195 ++++++++++++++++++
 4 files changed, 207 insertions(+), 1 deletion(-)
 create mode 100644 ktransformers/tests/mmlu_test.py

diff --git a/.gitignore b/.gitignore
index 1631d01..a951948 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,3 +23,7 @@ tmp1.txt
 test_65_300_1536.txt
 test.txt
 book
+ktransformers/tests/mmlu_result_silicon.json
+ktransformers/tests/chat_txt.txt
+mmlu_result_q4km.json
+mmlu_result_q4km.log
diff --git a/Makefile b/Makefile
index f8633a9..1602eef 100644
--- a/Makefile
+++ b/Makefile
@@ -18,4 +18,8 @@ dev_install:
 
 	echo "Installing ktransformers"
 	KTRANSFORMERS_FORCE_BUILD=TRUE pip install -e . -v --no-build-isolation
-	echo "Installation completed successfully"
\ No newline at end of file
+	echo "Installation completed successfully"
+install_numa:
+	USE_NUMA=1 make dev_install
+install_no_numa:
+	env -u USE_NUMA make dev_install
diff --git a/ktransformers/server/backend/interfaces/ktransformers.py b/ktransformers/server/backend/interfaces/ktransformers.py
index d4f5562..34f18b9 100644
--- a/ktransformers/server/backend/interfaces/ktransformers.py
+++ b/ktransformers/server/backend/interfaces/ktransformers.py
@@ -78,6 +78,7 @@ class KTransformersInterface(TransformersInterface):
         torch_device = get_device("blk.0.self_attn", device_map)
         torch_device = "cuda:0" if torch_device == "cuda" else torch_device
         global warm_uped
+        torch.cuda.set_device(torch_device)
         if self.args.use_cuda_graph and warm_uped == True:
             
             if not hasattr(self, "cuda_graph_runner"):
@@ -130,6 +131,7 @@ class KTransformersInterface(TransformersInterface):
         logger.debug(f"input_ids: {input_ids.shape}")
 
         device = self.device_map.get("blk.0.self_attn", {}).get("generate_device", "cuda:0")
+        device = "cuda:0" if device == "cuda" else device
 
         if is_new:
             self.cache.reset()
@@ -161,6 +163,7 @@ class KTransformersInterface(TransformersInterface):
         if not (type(self) is TransformersInterface):
             input_ids = input_ids.to("cpu")
         inputs_embeds = self.model.model.embed_tokens(input_ids).to(device)
+        torch.cuda.set_device(device)
         if self.use_static_cache:
             logits = self.model(
                 inputs_embeds=inputs_embeds,
diff --git a/ktransformers/tests/mmlu_test.py b/ktransformers/tests/mmlu_test.py
new file mode 100644
index 0000000..ea1ca78
--- /dev/null
+++ b/ktransformers/tests/mmlu_test.py
@@ -0,0 +1,195 @@
+import argparse
+import random
+import time
+import json
+import requests
+import pandas as pd
+from datasets import load_dataset
+
+import os
+os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
+os.environ['https_proxy'] = ''
+os.environ['http_proxy'] = ''
+hint = 'There is a single choice question. Answer the question by replying A, B, C, D. No other answers are accepted. Just the letter.'
+
+
+class DataEvaluator:
+    def __init__(self):
+        # self.template_prompt = template_prompt
+        self.data = []
+
+    def load_data(self, file_path):
+        """
+        Load data from a Parquet file into a list.
+        Each record in the Parquet file should represent an individual record.
+        """
+        # 读取 Parquet 文件
+        # dataset = load_dataset('parquet', data_files=file_path)
+        ds = load_dataset(file_path,"all")
+        df = pd.DataFrame(ds['test'])
+        # print(ds)
+        # # ds_1 =  ds['train']
+        # ds_2 =  ds['validation']
+        # ds_3 =  ds['test']
+        # # 将数据集转换为 Pandas DataFrame
+        # df_test = pd.DataFrame(ds['test'])
+        # df_val = pd.DataFrame(ds['validation'])
+
+        # for _, row in df.iterrows():
+        #     self.data.append(row.to_dict())
+        # df = pd.read_parquet(file_path)
+
+        for _, row in df.iterrows():
+            self.data.append(row.to_dict())
+
+    def get_prompt(self, record):
+        """
+        Combine fields from a record with the template prompt to create a full prompt.
+        :param record: Dictionary containing fields to populate the template.
+        :return: A formatted prompt string.
+        """
+        # 查看ABCD。。。的选项
+        options_str = "\n".join([f"{chr(65 + i)}. {opt}" for i, opt in enumerate(record['choices'])])
+        prompt = hint + "\nQuestion: " + record['question'] + "\n" + options_str + "\nAnswer: '"
+        return prompt
+        
+    def post_processing(self, text):
+        """
+        Perform post-processing on the prediction string.
+        :param text: The raw prediction string.
+        :return: Processed prediction string.
+        """
+        text = text.lstrip('\n').split('\n')[0]
+        return text[:1]
+
+    def score(self, pred, answers):
+        """
+        Calculate scores between the prediction and the answer.
+        Uses ROUGE scores as the evaluation metric.
+        :param pred: The predicted string.
+        :param answer: The reference answer string.
+        :return: A dictionary containing ROUGE scores.
+        """
+        for answer in answers:
+            if pred == answer:
+                return 1
+
+        return 0
+
+# Function to generate text using API
+def generate_text(api_url, question, model_name, stream=False):
+    headers = {
+        'accept': 'application/json',
+        'Content-Type': 'application/json',
+        # 添加 API Key
+        'Authorization' : 'Bearer '
+    }
+    data = {
+        "messages": [{"content": question, "role": "user"}],
+        "model": model_name,
+        "stream": stream,
+        "temperature": 0.6
+    }
+    
+    print("POST data:", data)
+    response = requests.post(api_url, headers=headers, json=data)
+    
+    if response.status_code == 200:
+        result = response.json()
+        return result.get('choices', [{}])[0].get('message', {}).get('content', '').strip()
+    else:
+        print(f"API Request failed with status code {response.status_code}")
+        return None
+
+# Main function to handle multiple evaluations
+def main(concurrent_requests, data_evaluator: DataEvaluator, result_file, log_file, api_url, model_name):
+    start_total_time = time.time()
+
+    total_score = 0
+
+    results = []
+   # 设置随机数种子
+    random.seed(42)
+    random.shuffle(data_evaluator.data)
+    for i in range(min(concurrent_requests, len(data_evaluator.data))):
+        # Randomly select a data item from data for each request
+        data_item = data_evaluator.data[i]
+        question = data_evaluator.get_prompt(data_item)
+        # print(question)
+
+        # Start the timer for this evaluation
+        start_time = time.time()
+        try:
+            # Generate prediction using the API
+            prediction = generate_text(api_url, question, model_name)
+
+            if prediction is None:
+                raise Exception(f"Failed to get prediction for {question}")
+
+            answer = chr(data_item['answer'] + 65)
+            # Compute score
+            score = data_evaluator.score(data_evaluator.post_processing(prediction), answer)
+
+            # Calculate the time taken
+            elapsed_time = time.time() - start_time
+
+            # Collect the result data
+            result_data = {
+                "question_id": i,
+                "answer": answer,
+                "prediction": data_evaluator.post_processing(prediction),
+                "score": score,
+                "time": elapsed_time
+            }
+
+            # Write results to result.json with each field on a new line
+            with open(result_file, 'a', encoding='utf-8') as f:
+                json.dump(result_data, f, ensure_ascii=False, indent=4)
+                f.write("\n")  # Ensure each JSON object is on a new line
+
+            results.append(result_data)
+
+            # Aggregate scores
+            total_score += score
+
+        except Exception as e:
+            print(f"Error processing request {i}: {e}")
+
+    # Calculate total time and throughput
+    total_time = time.time() - start_total_time
+    throughput = concurrent_requests / total_time
+
+    # Log the total time, throughput, and average ROUGE scores
+    with open(log_file, 'a', encoding='utf-8') as log_f:
+        log_f.write(f"Total Time: {total_time:.2f} seconds\n")
+        log_f.write(f"Throughput: {throughput:.2f} requests per second\n")
+        log_f.write(f"Average Scores: {total_score / concurrent_requests}\n")
+        log_f.write('-' * 40 + '\n')
+
+    print(f"Results saved to {result_file}")
+    print(f"Log saved to {log_file}")
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="API Generate Tester")
+    parser.add_argument("--concurrent", type=int, default=1000, help="Number of concurrent evaluations")
+    parser.add_argument("--file", type=str, default="cais/mmlu", help="Path to the mmlu.jsonl file")
+    parser.add_argument("--result", type=str, default="./mmlu_result_silicon.json", help="Path to save the result JSON file")
+    parser.add_argument("--log", type=str, default="./mmlu_result_silicon.log", help="Path to save the log file")
+    parser.add_argument("--model", type=str, default="Pro/deepseek-ai/DeepSeek-V3", help="Model name or path")
+    parser.add_argument("--api_url", type=str, default="http://localhost:10003/v1/chat/completions", help="API URL")
+    # parser.add_argument("--api_url", type=str, default="https://api.siliconflow.cn/v1/chat/completions", help="API URL")
+
+    args = parser.parse_args()
+
+    # Load the data from the provided file
+    # template_prompt = hint + "\nQuestion: {question}\nA. {options}\nB. {option_b}\nC. {option_c}\nD. {option_d}\nAnswer: '"
+    # template_prompt_pro = hint + "\nQuestion: {question}\nA. {options[0]}\nB. {options[1]}\nC. {options[2]}\nD. {options[3]}\nE. {options[4]}\nF. {options[5]}\nG. \
+        # {options[6]}\nH. {options[7]}\nI. {options[8]}\nJ. {options[9]}\nAnswer: '"
+
+
+    # Load the data from the provided file
+    data_evaluator = DataEvaluator()
+    data_evaluator.load_data(args.file)
+
+    # Run the main function with the specified number of concurrent evaluations
+    main(args.concurrent, data_evaluator, args.result, args.log, args.api_url, args.model)
\ No newline at end of file