Update repo for MiniCPM3

2026-01-19 12:53:36 +08:00 · 2024-09-05 17:41:40 +08:00 · 2024-09-05 17:41:40 +08:00 · 9a968f85fb
commit 9a968f85fb
parent 50b5755274
34 changed files with 1532 additions and 1642 deletions
--- a/README-en.md
+++ b/README-en.md
--- a/README.md
+++ b/README.md
--- a/assets/COCO_test2015_000000262144.jpg
+++ b/assets/COCO_test2015_000000262144.jpg
--- a/assets/code.case1.gif
+++ b/assets/code.case1.gif
--- a/assets/code.case2.gif
+++ b/assets/code.case2.gif
--- a/assets/code_interpreter.gif
+++ b/assets/code_interpreter.gif
--- a/assets/creation.case1.png
+++ b/assets/creation.case1.png
--- a/assets/creation.case2.png
+++ b/assets/creation.case2.png
--- a/assets/creation.case3.png
+++ b/assets/creation.case3.png
--- a/assets/en.code.case1.gif
+++ b/assets/en.code.case1.gif
--- a/assets/en.creation.case1.png
+++ b/assets/en.creation.case1.png
--- a/assets/en.creation.case2.png
+++ b/assets/en.creation.case2.png
--- a/assets/en.instruction_following.case1.png
+++ b/assets/en.instruction_following.case1.png
--- a/assets/en.math.case1.png
+++ b/assets/en.math.case1.png
--- a/assets/en.math.case2.png
+++ b/assets/en.math.case2.png
--- a/assets/en.special_char.case1.png
+++ b/assets/en.special_char.case1.png
--- a/assets/en.special_char.case2.png
+++ b/assets/en.special_char.case2.png
--- a/assets/en.translation.case1.png
+++ b/assets/en.translation.case1.png
--- a/assets/eval_needle.jpeg
+++ b/assets/eval_needle.jpeg
--- a/assets/instruction_following.case1.png
+++ b/assets/instruction_following.case1.png
--- a/assets/instruction_following.case2.png
+++ b/assets/instruction_following.case2.png
--- a/assets/knowledge.case1.png
+++ b/assets/knowledge.case1.png
--- a/assets/math.case1.png
+++ b/assets/math.case1.png
--- a/assets/math.case2.png
+++ b/assets/math.case2.png
--- a/assets/minicpm_logo.png
+++ b/assets/minicpm_logo.png
--- a/assets/modelbest.png
+++ b/assets/modelbest.png
--- a/assets/special_char.case1.png
+++ b/assets/special_char.case1.png
--- a/assets/special_char.case2.png
+++ b/assets/special_char.case2.png
--- a/assets/thunlp.png
+++ b/assets/thunlp.png
--- a/assets/translation.case1.png
+++ b/assets/translation.case1.png
--- a/assets/translation.case2.png
+++ b/assets/translation.case2.png
--- a/demo/code_interpreter.py
+++ b/demo/code_interpreter.py
@ -0,0 +1,186 @@
+import contextlib
+import io
+import json
+import os
+import re
+import sys
+import traceback
+
+import fire
+from vllm import LLM, SamplingParams
+
+max_turns = 5
+system_prompt_template = """You are an AI Agent who is proficient in solve complicated task. 
+Each step you should wirte executable code to fulfill user query. Any Response without code means the task is completed and you do not have another chance to submit code
+
+You are equipped with a codeinterpreter. You can give the code and get the execution result of your code. You should use the codeinterpreter in the following format: 
+<|execute_start|>
+```python
+
+<your code>
+
+```
+<|execute_end|>
+
+
+WARNING:Do not use cv2.waitKey(0) cv2.destroyAllWindows()!!! Or the program will be destoried
+
+Each round, your answer should ALWAYS use the following format(Each of your response should contain code, until you complete the task):
+
+
+Analyse:(Analyse the message you received and plan what you should do)  
+
+This Step Todo: One Subtask need to be done at this step  
+
+Code(WARNING:MAKE SURE YOU CODE FOLLOW THE FORMAT AND WRITE CODE OR THE TASK WILL BE FAILED): 
+<|execute_start|>
+```python
+
+<your code>
+
+
+```
+<|execute_end|>
+
+
+You will got the result of your code after each step. When the code of previous subtask is excuted successfully, you can write and excuet the code for next subtask
+When all the code your write are executed and you got the code result that can fulfill the user query, you should summarize the previous analyse process and make a formal response to user, The response should follow this format:
+WARNING:MAKE SURE YOU GET THE CODE EXECUTED RESULT THAT FULFILLED ALL REQUIREMENT OF USER BEFORE USE "Finished"
+Finished: <Answer to user query>
+
+Some notice:
+1. When you want to draw a plot, use plt.savefig() and print the image path in markdown format instead of plt.show()
+2. Save anything to ./output folder
+3. End the process whenever you complete the task, When you do not have Action(Code), Use: Finished: <summary the analyse process and make response>
+4. Do not ask for user input in your python code.
+"""
+
+def execute_code(code):
+
+    stdout_capture = io.StringIO()
+    stderr_capture = io.StringIO()
+
+    # Note here we simplely imitate notebook output.
+    # if you want to run more complex tasks, try to use nbclient to run python code
+    lines = code.strip().split('\n')
+    last_expr = lines[-1].strip()
+
+    if '=' in last_expr:
+        value = last_expr.split('=')[0].strip()
+        code += f"\nprint({value})"
+    
+    with contextlib.redirect_stdout(stdout_capture), contextlib.redirect_stderr(stderr_capture):
+        try:
+            # execute code here
+            exec(code)
+        except Exception as e:
+            return {'output': stdout_capture.getvalue(), 'error': str(e)}
+    
+    return {'output': stdout_capture.getvalue(), 'error': stderr_capture.getvalue()}
+
+class DemoLLM:
+    def __init__(self, model_path):
+        # Initialize default sampling parameters
+        params_dict = {
+            "n": 1,
+            "best_of": None,
+            "presence_penalty": 0.0,
+            "frequency_penalty": 0.0,
+            "repetition_penalty": 1.02,
+            "temperature": 1.0,
+            "top_p": 0.85,
+            "top_k": -1,
+            "use_beam_search": False,
+            "length_penalty": 1.0,
+            "early_stopping": False,
+            "stop": None,
+            "stop_token_ids": None,
+            "ignore_eos": False,
+            "max_tokens": 300,
+            "logprobs": None,
+            "prompt_logprobs": None,
+            "skip_special_tokens": True,
+        }
+        
+        # Create a SamplingParams object
+        self.sampling_params = SamplingParams(**params_dict)
+        
+        # Initialize the language model
+        self.llm = LLM(
+            model=model_path,
+            tensor_parallel_size=1,
+            trust_remote_code=True,
+            enforce_eager=True
+        )
+
+    def apply_template(self, messages):
+        """Formats messages into a prompt string for the LLM."""
+        formatted_messages = [
+            f"<|im_start|>{msg['role']}\n{msg['content']}<|im_end|>\n"
+            for msg in messages
+        ]
+        formatted_messages.append("<|im_start|>assistant\n")
+        return ''.join(formatted_messages)
+
+    def generate(self, messages):
+        """Generates a response from the LLM based on the input messages."""
+        raw_input = self.apply_template(messages)
+        response = self.llm.generate(raw_input, self.sampling_params)
+        if response:
+            return response[0].outputs[0].text
+        return None
+
+def extract_code(text):
+    """ Extracts Python code blocks from the given text. """
+    # Define a regular expression pattern to match Python code blocks
+    pattern = r'```python\s+(.*?)\s+```'
+    matches = re.findall(pattern, text, re.DOTALL)
+    
+    return matches
+
+def process(model_path):
+    """
+    Processes interactions with the DemoLLM using provided model path.
+
+    Args:
+        model_path (str): The path to the language model directory.
+    """
+
+    # Initialize the language model
+    llm = DemoLLM(model_path)
+
+    # Define initial messages
+    messages = [
+        {"role": "system", "content": system_prompt_template},
+        {"role": "user", "content": "2 的 100 次方是多少？"},
+    ]
+
+    for index in range(max_turns):
+        print(f"Turn {index+1} start...")
+
+        # Generate response from the LLM
+        raw_resp = llm.generate(messages)
+        print(f"Raw response: {raw_resp}")
+
+        # Check if the response contains the termination keyword
+        if "Finished" in raw_resp:
+            break
+
+        # Extract code from the raw response
+        code_list = extract_code(raw_resp)
+
+        if not code_list:
+            break
+
+        # Execute the extracted code
+        code_str = code_list[-1]
+        run_result = execute_code(code_str)
+        executor_response = run_result['output'] if run_result['error'] == "" else run_result['error']
+        print(f"Code execution result: {run_result}")
+
+        # Append the execution result to the messages
+        messages.append({"role": "user", "content": executor_response})
+
+
+if __name__ == "__main__":
+    fire.Fire(process)
--- a/demo/function_calling.py
+++ b/demo/function_calling.py
@ -0,0 +1,120 @@
+#!/usr/bin/env python
+# encoding: utf-8
+from transformers import AutoTokenizer
+from vllm import LLM, SamplingParams
+import json
+
+model_path = "openbmb/MiniCPM3-4B"
+
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_delivery_date",
+            "description": "Get the delivery date for a customer's order. Call this whenever you need to know the delivery date, for example when a customer asks 'Where is my package'",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "order_id": {
+                        "type": "string",
+                        "description": "The customer's order ID.",
+                    },
+                },
+                "required": ["order_id"],
+                "additionalProperties": False,
+            },
+        },
+    }
+]
+messages = [
+    {
+        "role": "system",
+        "content": "You are a helpful customer support assistant. Use the supplied tools to assist the user.",
+    },
+    {
+        "role": "user",
+        "content": "Hi, can you tell me the delivery date for my order? The order id is 1234 and 4321.",
+    },
+    # {
+    #    "content": "",
+    #    "tool_calls": [
+    #        {
+    #            "type": "function",
+    #            "function": {
+    #                "name": "get_delivery_date",
+    #                "arguments": {"order_id": "1234"},
+    #            },
+    #            "id": "call_b4ab0b4ec4b5442e86f017fe0385e22e",
+    #        },
+    #        {
+    #            "type": "function",
+    #            "function": {
+    #                "name": "get_delivery_date",
+    #                "arguments": {"order_id": "4321"},
+    #            },
+    #            "id": "call_628965479dd84794bbb72ab9bdda0c39",
+    #        },
+    #    ],
+    #    "role": "assistant",
+    # },
+    # {
+    #    "role": "tool",
+    #    "content": '{"delivery_date": "2024-09-05", "order_id": "1234"}',
+    #    "tool_call_id": "call_b4ab0b4ec4b5442e86f017fe0385e22e",
+    # },
+    # {
+    #    "role": "tool",
+    #    "content": '{"delivery_date": "2024-09-05", "order_id": "4321"}',
+    #    "tool_call_id": "call_628965479dd84794bbb72ab9bdda0c39",
+    # },
+    # {
+    #    "content": "Both your orders will be delivered on 2024-09-05.",
+    #    "role": "assistant",
+    #    "thought": "\nI have the information you need, both orders will be delivered on the same date, 2024-09-05.\n",
+    # },
+]
+tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+prompt = tokenizer.apply_chat_template(
+    messages, tools=tools, tokenize=False, add_generation_prompt=True
+)
+llm = LLM(model_path, trust_remote_code=True)
+sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=1000)
+
+
+def fake_tool_execute(toolcall):
+    data = {
+        "delivery_date": "2024-09-05",
+        "order_id": toolcall.get("function", {})
+        .get("arguments", {})
+        .get("order_id", "order_id"),
+    }
+    return json.dumps(data)
+
+
+while True:
+    prompt = tokenizer.apply_chat_template(
+        messages, tools=tools, tokenize=False, add_generation_prompt=True
+    )
+    outputs = llm.generate([prompt], sampling_params)
+    response = outputs[0].outputs[0].text
+    msg = tokenizer.decode_function_call(response)
+    if (
+        "tool_calls" in msg
+        and msg["tool_calls"] is not None
+        and len(msg["tool_calls"]) > 0
+    ):
+        messages.append(msg)
+        print(msg)
+        for toolcall in msg["tool_calls"]:
+            tool_response = fake_tool_execute(toolcall)
+            tool_msg = {
+                "role": "tool",
+                "content": tool_response,
+                "tool_call_id": toolcall["id"],
+            }
+            messages.append(tool_msg)
+            print(tool_msg)
+    else:
+        messages.append(msg)
+        print(msg)
+        break
--- a/inference/inference_vllm.py
+++ b/inference/inference_vllm.py
@ -1,58 +0,0 @@
-from vllm import LLM, SamplingParams
-import argparse
-
-parser = argparse.ArgumentParser()
-
-parser.add_argument("--model_path", type=str, default="")
-parser.add_argument("--prompt_path", type=str, default="")
-
-
-args = parser.parse_args()
-
-with open(args.prompt_path, "r") as f:
-    prompts = f.readlines()
-
-prompt_template = "<用户>{}<AI>"
-
-prompts = [prompt_template.format(prompt.strip()) for prompt in prompts]
-
-params_dict = {
-    "n": 1,
-    "best_of": 1,
-    "presence_penalty": 1.0,    
-    "frequency_penalty": 0.0,
-    "temperature": 0.5,
-    "top_p": 0.8,
-    "top_k": -1,
-    "use_beam_search": False,
-    "length_penalty": 1,
-    "early_stopping": False,
-    "stop": None,
-    "stop_token_ids": None,
-    "ignore_eos": False,
-    "max_tokens": 1000,
-    "logprobs": None,
-    "prompt_logprobs": None,
-    "skip_special_tokens": True,
-}
-
-# Create a sampling params object.
-sampling_params = SamplingParams(**params_dict)
-
-# Create an LLM.
-llm = LLM(model=args.model_path, tensor_parallel_size=1, dtype='bfloat16')
-# Generate texts from the prompts. The output is a list of RequestOutput objects
-# that contain the prompt, generated text, and other information.
-for prompt in prompts:
-    outputs = llm.generate(prompt, sampling_params)
-    # Print the outputs.
-    for output in outputs:
-        prompt = output.prompt
-        generated_text = output.outputs[0].text
-        print("================")
-        # find the first <用户> and remove the text before it.
-        clean_prompt = prompt[prompt.find("<用户>")+len("<用户>"):]
-
-        print(f"""<用户>: {clean_prompt.replace("<AI>", "")}""")
-        print(f"<AI>:")
-        print(generated_text)