Update repo for MiniCPM3

2026-01-19 12:53:36 +08:00 · 2024-09-05 17:41:40 +08:00 · 2024-09-05 17:41:40 +08:00 · 9a968f85fb
commit 9a968f85fb
parent 50b5755274
34 changed files with 1532 additions and 1642 deletions
--- a/README-en.md
+++ b/README-en.md
--- a/README.md
+++ b/README.md
--- a/assets/COCO_test2015_000000262144.jpg
+++ b/assets/COCO_test2015_000000262144.jpg
--- a/assets/code.case1.gif
+++ b/assets/code.case1.gif
--- a/assets/code.case2.gif
+++ b/assets/code.case2.gif
--- a/assets/code_interpreter.gif
+++ b/assets/code_interpreter.gif
--- a/assets/creation.case1.png
+++ b/assets/creation.case1.png
--- a/assets/creation.case2.png
+++ b/assets/creation.case2.png
--- a/assets/creation.case3.png
+++ b/assets/creation.case3.png
--- a/assets/en.code.case1.gif
+++ b/assets/en.code.case1.gif
--- a/assets/en.creation.case1.png
+++ b/assets/en.creation.case1.png
--- a/assets/en.creation.case2.png
+++ b/assets/en.creation.case2.png
--- a/assets/en.instruction_following.case1.png
+++ b/assets/en.instruction_following.case1.png
--- a/assets/en.math.case1.png
+++ b/assets/en.math.case1.png
--- a/assets/en.math.case2.png
+++ b/assets/en.math.case2.png
--- a/assets/en.special_char.case1.png
+++ b/assets/en.special_char.case1.png
--- a/assets/en.special_char.case2.png
+++ b/assets/en.special_char.case2.png
--- a/assets/en.translation.case1.png
+++ b/assets/en.translation.case1.png
--- a/assets/eval_needle.jpeg
+++ b/assets/eval_needle.jpeg
--- a/assets/instruction_following.case1.png
+++ b/assets/instruction_following.case1.png
--- a/assets/instruction_following.case2.png
+++ b/assets/instruction_following.case2.png
--- a/assets/knowledge.case1.png
+++ b/assets/knowledge.case1.png
--- a/assets/math.case1.png
+++ b/assets/math.case1.png
--- a/assets/math.case2.png
+++ b/assets/math.case2.png
--- a/assets/minicpm_logo.png
+++ b/assets/minicpm_logo.png
--- a/assets/modelbest.png
+++ b/assets/modelbest.png
--- a/assets/special_char.case1.png
+++ b/assets/special_char.case1.png
--- a/assets/special_char.case2.png
+++ b/assets/special_char.case2.png
--- a/assets/thunlp.png
+++ b/assets/thunlp.png
--- a/assets/translation.case1.png
+++ b/assets/translation.case1.png
--- a/assets/translation.case2.png
+++ b/assets/translation.case2.png
--- a/demo/code_interpreter.py
+++ b/demo/code_interpreter.py
@ -0,0 +1,186 @@
 import contextlib
 import io
 import json
 import os
 import re
 import sys
 import traceback
 import fire
 from vllm import LLM, SamplingParams
 max_turns = 5
 system_prompt_template = """You are an AI Agent who is proficient in solve complicated task. 
 Each step you should wirte executable code to fulfill user query. Any Response without code means the task is completed and you do not have another chance to submit code
 You are equipped with a codeinterpreter. You can give the code and get the execution result of your code. You should use the codeinterpreter in the following format: 
 <|execute_start|>
 ```python
 <your code>
 ```
 <|execute_end|>
 WARNING:Do not use cv2.waitKey(0) cv2.destroyAllWindows()!!! Or the program will be destoried
 Each round, your answer should ALWAYS use the following format(Each of your response should contain code, until you complete the task):
 Analyse:(Analyse the message you received and plan what you should do)  
 This Step Todo: One Subtask need to be done at this step  
 Code(WARNING:MAKE SURE YOU CODE FOLLOW THE FORMAT AND WRITE CODE OR THE TASK WILL BE FAILED): 
 <|execute_start|>
 ```python
 <your code>
 ```
 <|execute_end|>
 You will got the result of your code after each step. When the code of previous subtask is excuted successfully, you can write and excuet the code for next subtask
 When all the code your write are executed and you got the code result that can fulfill the user query, you should summarize the previous analyse process and make a formal response to user, The response should follow this format:
 WARNING:MAKE SURE YOU GET THE CODE EXECUTED RESULT THAT FULFILLED ALL REQUIREMENT OF USER BEFORE USE "Finished"
 Finished: <Answer to user query>
 Some notice:
 1. When you want to draw a plot, use plt.savefig() and print the image path in markdown format instead of plt.show()
 2. Save anything to ./output folder
 3. End the process whenever you complete the task, When you do not have Action(Code), Use: Finished: <summary the analyse process and make response>
 4. Do not ask for user input in your python code.
 """
 def execute_code(code):
    stdout_capture = io.StringIO()
    stderr_capture = io.StringIO()
    # Note here we simplely imitate notebook output.
    # if you want to run more complex tasks, try to use nbclient to run python code
    lines = code.strip().split('\n')
    last_expr = lines[-1].strip()
    if '=' in last_expr:
        value = last_expr.split('=')[0].strip()
        code += f"\nprint({value})"
    with contextlib.redirect_stdout(stdout_capture), contextlib.redirect_stderr(stderr_capture):
        try:
            # execute code here
            exec(code)
        except Exception as e:
            return {'output': stdout_capture.getvalue(), 'error': str(e)}
    return {'output': stdout_capture.getvalue(), 'error': stderr_capture.getvalue()}
 class DemoLLM:
    def __init__(self, model_path):
        # Initialize default sampling parameters
        params_dict = {
            "n": 1,
            "best_of": None,
            "presence_penalty": 0.0,
            "frequency_penalty": 0.0,
            "repetition_penalty": 1.02,
            "temperature": 1.0,
            "top_p": 0.85,
            "top_k": -1,
            "use_beam_search": False,
            "length_penalty": 1.0,
            "early_stopping": False,
            "stop": None,
            "stop_token_ids": None,
            "ignore_eos": False,
            "max_tokens": 300,
            "logprobs": None,
            "prompt_logprobs": None,
            "skip_special_tokens": True,
        }
        # Create a SamplingParams object
        self.sampling_params = SamplingParams(**params_dict)
        # Initialize the language model
        self.llm = LLM(
            model=model_path,
            tensor_parallel_size=1,
            trust_remote_code=True,
            enforce_eager=True
        )
    def apply_template(self, messages):
        """Formats messages into a prompt string for the LLM."""
        formatted_messages = [
            f"<|im_start|>{msg['role']}\n{msg['content']}<|im_end|>\n"
            for msg in messages
        ]
        formatted_messages.append("<|im_start|>assistant\n")
        return ''.join(formatted_messages)
    def generate(self, messages):
        """Generates a response from the LLM based on the input messages."""
        raw_input = self.apply_template(messages)
        response = self.llm.generate(raw_input, self.sampling_params)
        if response:
            return response[0].outputs[0].text
        return None
 def extract_code(text):
    """ Extracts Python code blocks from the given text. """
    # Define a regular expression pattern to match Python code blocks
    pattern = r'```python\s+(.*?)\s+```'
    matches = re.findall(pattern, text, re.DOTALL)
    return matches
 def process(model_path):
    """
    Processes interactions with the DemoLLM using provided model path.
    Args:
        model_path (str): The path to the language model directory.
    """
    # Initialize the language model
    llm = DemoLLM(model_path)
    # Define initial messages
    messages = [
        {"role": "system", "content": system_prompt_template},
        {"role": "user", "content": "2 的 100 次方是多少？"},
    ]
    for index in range(max_turns):
        print(f"Turn {index+1} start...")
        # Generate response from the LLM
        raw_resp = llm.generate(messages)
        print(f"Raw response: {raw_resp}")
        # Check if the response contains the termination keyword
        if "Finished" in raw_resp:
            break
        # Extract code from the raw response
        code_list = extract_code(raw_resp)
        if not code_list:
            break
        # Execute the extracted code
        code_str = code_list[-1]
        run_result = execute_code(code_str)
        executor_response = run_result['output'] if run_result['error'] == "" else run_result['error']
        print(f"Code execution result: {run_result}")
        # Append the execution result to the messages
        messages.append({"role": "user", "content": executor_response})
 if __name__ == "__main__":
    fire.Fire(process)
--- a/demo/function_calling.py
+++ b/demo/function_calling.py
@ -0,0 +1,120 @@
 #!/usr/bin/env python
 # encoding: utf-8
 from transformers import AutoTokenizer
 from vllm import LLM, SamplingParams
 import json
 model_path = "openbmb/MiniCPM3-4B"
 tools = [
    {
        "type": "function",
        "function": {
            "name": "get_delivery_date",
            "description": "Get the delivery date for a customer's order. Call this whenever you need to know the delivery date, for example when a customer asks 'Where is my package'",
            "parameters": {
                "type": "object",
                "properties": {
                    "order_id": {
                        "type": "string",
                        "description": "The customer's order ID.",
                    },
                },
                "required": ["order_id"],
                "additionalProperties": False,
            },
        },
    }
 ]
 messages = [
    {
        "role": "system",
        "content": "You are a helpful customer support assistant. Use the supplied tools to assist the user.",
    },
    {
        "role": "user",
        "content": "Hi, can you tell me the delivery date for my order? The order id is 1234 and 4321.",
    },
    # {
    #    "content": "",
    #    "tool_calls": [
    #        {
    #            "type": "function",
    #            "function": {
    #                "name": "get_delivery_date",
    #                "arguments": {"order_id": "1234"},
    #            },
    #            "id": "call_b4ab0b4ec4b5442e86f017fe0385e22e",
    #        },
    #        {
    #            "type": "function",
    #            "function": {
    #                "name": "get_delivery_date",
    #                "arguments": {"order_id": "4321"},
    #            },
    #            "id": "call_628965479dd84794bbb72ab9bdda0c39",
    #        },
    #    ],
    #    "role": "assistant",
    # },
    # {
    #    "role": "tool",
    #    "content": '{"delivery_date": "2024-09-05", "order_id": "1234"}',
    #    "tool_call_id": "call_b4ab0b4ec4b5442e86f017fe0385e22e",
    # },
    # {
    #    "role": "tool",
    #    "content": '{"delivery_date": "2024-09-05", "order_id": "4321"}',
    #    "tool_call_id": "call_628965479dd84794bbb72ab9bdda0c39",
    # },
    # {
    #    "content": "Both your orders will be delivered on 2024-09-05.",
    #    "role": "assistant",
    #    "thought": "\nI have the information you need, both orders will be delivered on the same date, 2024-09-05.\n",
    # },
 ]
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 prompt = tokenizer.apply_chat_template(
    messages, tools=tools, tokenize=False, add_generation_prompt=True
 )
 llm = LLM(model_path, trust_remote_code=True)
 sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=1000)
 def fake_tool_execute(toolcall):
    data = {
        "delivery_date": "2024-09-05",
        "order_id": toolcall.get("function", {})
        .get("arguments", {})
        .get("order_id", "order_id"),
    }
    return json.dumps(data)
 while True:
    prompt = tokenizer.apply_chat_template(
        messages, tools=tools, tokenize=False, add_generation_prompt=True
    )
    outputs = llm.generate([prompt], sampling_params)
    response = outputs[0].outputs[0].text
    msg = tokenizer.decode_function_call(response)
    if (
        "tool_calls" in msg
        and msg["tool_calls"] is not None
        and len(msg["tool_calls"]) > 0
    ):
        messages.append(msg)
        print(msg)
        for toolcall in msg["tool_calls"]:
            tool_response = fake_tool_execute(toolcall)
            tool_msg = {
                "role": "tool",
                "content": tool_response,
                "tool_call_id": toolcall["id"],
            }
            messages.append(tool_msg)
            print(tool_msg)
    else:
        messages.append(msg)
        print(msg)
        break
--- a/inference/inference_vllm.py
+++ b/inference/inference_vllm.py
@ -1,58 +0,0 @@
 from vllm import LLM, SamplingParams
 import argparse
 parser = argparse.ArgumentParser()
 parser.add_argument("--model_path", type=str, default="")
 parser.add_argument("--prompt_path", type=str, default="")
 args = parser.parse_args()
 with open(args.prompt_path, "r") as f:
    prompts = f.readlines()
 prompt_template = "<用户>{}<AI>"
 prompts = [prompt_template.format(prompt.strip()) for prompt in prompts]
 params_dict = {
    "n": 1,
    "best_of": 1,
    "presence_penalty": 1.0,    
    "frequency_penalty": 0.0,
    "temperature": 0.5,
    "top_p": 0.8,
    "top_k": -1,
    "use_beam_search": False,
    "length_penalty": 1,
    "early_stopping": False,
    "stop": None,
    "stop_token_ids": None,
    "ignore_eos": False,
    "max_tokens": 1000,
    "logprobs": None,
    "prompt_logprobs": None,
    "skip_special_tokens": True,
 }
 # Create a sampling params object.
 sampling_params = SamplingParams(**params_dict)
 # Create an LLM.
 llm = LLM(model=args.model_path, tensor_parallel_size=1, dtype='bfloat16')
 # Generate texts from the prompts. The output is a list of RequestOutput objects
 # that contain the prompt, generated text, and other information.
 for prompt in prompts:
    outputs = llm.generate(prompt, sampling_params)
    # Print the outputs.
    for output in outputs:
        prompt = output.prompt
        generated_text = output.outputs[0].text
        print("================")
        # find the first <用户> and remove the text before it.
        clean_prompt = prompt[prompt.find("<用户>")+len("<用户>"):]
        print(f"""<用户>: {clean_prompt.replace("<AI>", "")}""")
        print(f"<AI>:")
        print(generated_text)