Update repo for MiniCPM3

This commit is contained in:
zh-zheng 2024-09-05 17:41:40 +08:00
parent 50b5755274
commit 9a968f85fb
34 changed files with 1532 additions and 1642 deletions

File diff suppressed because it is too large Load Diff

1419
README.md

File diff suppressed because it is too large Load Diff

Binary file not shown.

Before

Width:  |  Height:  |  Size: 188 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.6 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 MiB

BIN
assets/code_interpreter.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 111 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 373 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 204 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.9 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 304 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 557 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 353 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 163 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 279 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 87 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 71 KiB

BIN
assets/eval_needle.jpeg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 398 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 365 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 100 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 136 KiB

BIN
assets/minicpm_logo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 618 KiB

BIN
assets/modelbest.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 423 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 95 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 303 KiB

BIN
assets/thunlp.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 78 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 119 KiB

186
demo/code_interpreter.py Normal file
View File

@ -0,0 +1,186 @@
import contextlib
import io
import json
import os
import re
import sys
import traceback
import fire
from vllm import LLM, SamplingParams
max_turns = 5
system_prompt_template = """You are an AI Agent who is proficient in solve complicated task.
Each step you should wirte executable code to fulfill user query. Any Response without code means the task is completed and you do not have another chance to submit code
You are equipped with a codeinterpreter. You can give the code and get the execution result of your code. You should use the codeinterpreter in the following format:
<|execute_start|>
```python
<your code>
```
<|execute_end|>
WARNING:Do not use cv2.waitKey(0) cv2.destroyAllWindows()!!! Or the program will be destoried
Each round, your answer should ALWAYS use the following format(Each of your response should contain code, until you complete the task):
Analyse:(Analyse the message you received and plan what you should do)
This Step Todo: One Subtask need to be done at this step
Code(WARNING:MAKE SURE YOU CODE FOLLOW THE FORMAT AND WRITE CODE OR THE TASK WILL BE FAILED):
<|execute_start|>
```python
<your code>
```
<|execute_end|>
You will got the result of your code after each step. When the code of previous subtask is excuted successfully, you can write and excuet the code for next subtask
When all the code your write are executed and you got the code result that can fulfill the user query, you should summarize the previous analyse process and make a formal response to user, The response should follow this format:
WARNING:MAKE SURE YOU GET THE CODE EXECUTED RESULT THAT FULFILLED ALL REQUIREMENT OF USER BEFORE USE "Finished"
Finished: <Answer to user query>
Some notice:
1. When you want to draw a plot, use plt.savefig() and print the image path in markdown format instead of plt.show()
2. Save anything to ./output folder
3. End the process whenever you complete the task, When you do not have Action(Code), Use: Finished: <summary the analyse process and make response>
4. Do not ask for user input in your python code.
"""
def execute_code(code):
stdout_capture = io.StringIO()
stderr_capture = io.StringIO()
# Note here we simplely imitate notebook output.
# if you want to run more complex tasks, try to use nbclient to run python code
lines = code.strip().split('\n')
last_expr = lines[-1].strip()
if '=' in last_expr:
value = last_expr.split('=')[0].strip()
code += f"\nprint({value})"
with contextlib.redirect_stdout(stdout_capture), contextlib.redirect_stderr(stderr_capture):
try:
# execute code here
exec(code)
except Exception as e:
return {'output': stdout_capture.getvalue(), 'error': str(e)}
return {'output': stdout_capture.getvalue(), 'error': stderr_capture.getvalue()}
class DemoLLM:
def __init__(self, model_path):
# Initialize default sampling parameters
params_dict = {
"n": 1,
"best_of": None,
"presence_penalty": 0.0,
"frequency_penalty": 0.0,
"repetition_penalty": 1.02,
"temperature": 1.0,
"top_p": 0.85,
"top_k": -1,
"use_beam_search": False,
"length_penalty": 1.0,
"early_stopping": False,
"stop": None,
"stop_token_ids": None,
"ignore_eos": False,
"max_tokens": 300,
"logprobs": None,
"prompt_logprobs": None,
"skip_special_tokens": True,
}
# Create a SamplingParams object
self.sampling_params = SamplingParams(**params_dict)
# Initialize the language model
self.llm = LLM(
model=model_path,
tensor_parallel_size=1,
trust_remote_code=True,
enforce_eager=True
)
def apply_template(self, messages):
"""Formats messages into a prompt string for the LLM."""
formatted_messages = [
f"<|im_start|>{msg['role']}\n{msg['content']}<|im_end|>\n"
for msg in messages
]
formatted_messages.append("<|im_start|>assistant\n")
return ''.join(formatted_messages)
def generate(self, messages):
"""Generates a response from the LLM based on the input messages."""
raw_input = self.apply_template(messages)
response = self.llm.generate(raw_input, self.sampling_params)
if response:
return response[0].outputs[0].text
return None
def extract_code(text):
""" Extracts Python code blocks from the given text. """
# Define a regular expression pattern to match Python code blocks
pattern = r'```python\s+(.*?)\s+```'
matches = re.findall(pattern, text, re.DOTALL)
return matches
def process(model_path):
"""
Processes interactions with the DemoLLM using provided model path.
Args:
model_path (str): The path to the language model directory.
"""
# Initialize the language model
llm = DemoLLM(model_path)
# Define initial messages
messages = [
{"role": "system", "content": system_prompt_template},
{"role": "user", "content": "2 的 100 次方是多少?"},
]
for index in range(max_turns):
print(f"Turn {index+1} start...")
# Generate response from the LLM
raw_resp = llm.generate(messages)
print(f"Raw response: {raw_resp}")
# Check if the response contains the termination keyword
if "Finished" in raw_resp:
break
# Extract code from the raw response
code_list = extract_code(raw_resp)
if not code_list:
break
# Execute the extracted code
code_str = code_list[-1]
run_result = execute_code(code_str)
executor_response = run_result['output'] if run_result['error'] == "" else run_result['error']
print(f"Code execution result: {run_result}")
# Append the execution result to the messages
messages.append({"role": "user", "content": executor_response})
if __name__ == "__main__":
fire.Fire(process)

120
demo/function_calling.py Normal file
View File

@ -0,0 +1,120 @@
#!/usr/bin/env python
# encoding: utf-8
from transformers import AutoTokenizer
from vllm import LLM, SamplingParams
import json
model_path = "openbmb/MiniCPM3-4B"
tools = [
{
"type": "function",
"function": {
"name": "get_delivery_date",
"description": "Get the delivery date for a customer's order. Call this whenever you need to know the delivery date, for example when a customer asks 'Where is my package'",
"parameters": {
"type": "object",
"properties": {
"order_id": {
"type": "string",
"description": "The customer's order ID.",
},
},
"required": ["order_id"],
"additionalProperties": False,
},
},
}
]
messages = [
{
"role": "system",
"content": "You are a helpful customer support assistant. Use the supplied tools to assist the user.",
},
{
"role": "user",
"content": "Hi, can you tell me the delivery date for my order? The order id is 1234 and 4321.",
},
# {
# "content": "",
# "tool_calls": [
# {
# "type": "function",
# "function": {
# "name": "get_delivery_date",
# "arguments": {"order_id": "1234"},
# },
# "id": "call_b4ab0b4ec4b5442e86f017fe0385e22e",
# },
# {
# "type": "function",
# "function": {
# "name": "get_delivery_date",
# "arguments": {"order_id": "4321"},
# },
# "id": "call_628965479dd84794bbb72ab9bdda0c39",
# },
# ],
# "role": "assistant",
# },
# {
# "role": "tool",
# "content": '{"delivery_date": "2024-09-05", "order_id": "1234"}',
# "tool_call_id": "call_b4ab0b4ec4b5442e86f017fe0385e22e",
# },
# {
# "role": "tool",
# "content": '{"delivery_date": "2024-09-05", "order_id": "4321"}',
# "tool_call_id": "call_628965479dd84794bbb72ab9bdda0c39",
# },
# {
# "content": "Both your orders will be delivered on 2024-09-05.",
# "role": "assistant",
# "thought": "\nI have the information you need, both orders will be delivered on the same date, 2024-09-05.\n",
# },
]
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
prompt = tokenizer.apply_chat_template(
messages, tools=tools, tokenize=False, add_generation_prompt=True
)
llm = LLM(model_path, trust_remote_code=True)
sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=1000)
def fake_tool_execute(toolcall):
data = {
"delivery_date": "2024-09-05",
"order_id": toolcall.get("function", {})
.get("arguments", {})
.get("order_id", "order_id"),
}
return json.dumps(data)
while True:
prompt = tokenizer.apply_chat_template(
messages, tools=tools, tokenize=False, add_generation_prompt=True
)
outputs = llm.generate([prompt], sampling_params)
response = outputs[0].outputs[0].text
msg = tokenizer.decode_function_call(response)
if (
"tool_calls" in msg
and msg["tool_calls"] is not None
and len(msg["tool_calls"]) > 0
):
messages.append(msg)
print(msg)
for toolcall in msg["tool_calls"]:
tool_response = fake_tool_execute(toolcall)
tool_msg = {
"role": "tool",
"content": tool_response,
"tool_call_id": toolcall["id"],
}
messages.append(tool_msg)
print(tool_msg)
else:
messages.append(msg)
print(msg)
break

View File

@ -1,58 +0,0 @@
from vllm import LLM, SamplingParams
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--model_path", type=str, default="")
parser.add_argument("--prompt_path", type=str, default="")
args = parser.parse_args()
with open(args.prompt_path, "r") as f:
prompts = f.readlines()
prompt_template = "<用户>{}<AI>"
prompts = [prompt_template.format(prompt.strip()) for prompt in prompts]
params_dict = {
"n": 1,
"best_of": 1,
"presence_penalty": 1.0,
"frequency_penalty": 0.0,
"temperature": 0.5,
"top_p": 0.8,
"top_k": -1,
"use_beam_search": False,
"length_penalty": 1,
"early_stopping": False,
"stop": None,
"stop_token_ids": None,
"ignore_eos": False,
"max_tokens": 1000,
"logprobs": None,
"prompt_logprobs": None,
"skip_special_tokens": True,
}
# Create a sampling params object.
sampling_params = SamplingParams(**params_dict)
# Create an LLM.
llm = LLM(model=args.model_path, tensor_parallel_size=1, dtype='bfloat16')
# Generate texts from the prompts. The output is a list of RequestOutput objects
# that contain the prompt, generated text, and other information.
for prompt in prompts:
outputs = llm.generate(prompt, sampling_params)
# Print the outputs.
for output in outputs:
prompt = output.prompt
generated_text = output.outputs[0].text
print("================")
# find the first <用户> and remove the text before it.
clean_prompt = prompt[prompt.find("<用户>")+len("<用户>"):]
print(f"""<用户>: {clean_prompt.replace("<AI>", "")}""")
print(f"<AI>:")
print(generated_text)