Update repo for MiniCPM3
1391
README-en.md
|
Before Width: | Height: | Size: 188 KiB |
|
Before Width: | Height: | Size: 4.6 MiB |
|
Before Width: | Height: | Size: 11 MiB |
BIN
assets/code_interpreter.gif
Normal file
|
After Width: | Height: | Size: 33 MiB |
|
Before Width: | Height: | Size: 111 KiB |
|
Before Width: | Height: | Size: 373 KiB |
|
Before Width: | Height: | Size: 204 KiB |
|
Before Width: | Height: | Size: 4.9 MiB |
|
Before Width: | Height: | Size: 304 KiB |
|
Before Width: | Height: | Size: 557 KiB |
|
Before Width: | Height: | Size: 353 KiB |
|
Before Width: | Height: | Size: 163 KiB |
|
Before Width: | Height: | Size: 68 KiB |
|
Before Width: | Height: | Size: 279 KiB |
|
Before Width: | Height: | Size: 87 KiB |
|
Before Width: | Height: | Size: 71 KiB |
BIN
assets/eval_needle.jpeg
Normal file
|
After Width: | Height: | Size: 398 KiB |
|
Before Width: | Height: | Size: 365 KiB |
|
Before Width: | Height: | Size: 90 KiB |
|
Before Width: | Height: | Size: 100 KiB |
|
Before Width: | Height: | Size: 67 KiB |
|
Before Width: | Height: | Size: 136 KiB |
BIN
assets/minicpm_logo.png
Normal file
|
After Width: | Height: | Size: 618 KiB |
BIN
assets/modelbest.png
Normal file
|
After Width: | Height: | Size: 423 KiB |
|
Before Width: | Height: | Size: 95 KiB |
|
Before Width: | Height: | Size: 303 KiB |
BIN
assets/thunlp.png
Normal file
|
After Width: | Height: | Size: 24 KiB |
|
Before Width: | Height: | Size: 78 KiB |
|
Before Width: | Height: | Size: 119 KiB |
186
demo/code_interpreter.py
Normal file
@ -0,0 +1,186 @@
|
|||||||
|
import contextlib
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
import fire
|
||||||
|
from vllm import LLM, SamplingParams
|
||||||
|
|
||||||
|
max_turns = 5
|
||||||
|
system_prompt_template = """You are an AI Agent who is proficient in solve complicated task.
|
||||||
|
Each step you should wirte executable code to fulfill user query. Any Response without code means the task is completed and you do not have another chance to submit code
|
||||||
|
|
||||||
|
You are equipped with a codeinterpreter. You can give the code and get the execution result of your code. You should use the codeinterpreter in the following format:
|
||||||
|
<|execute_start|>
|
||||||
|
```python
|
||||||
|
|
||||||
|
<your code>
|
||||||
|
|
||||||
|
```
|
||||||
|
<|execute_end|>
|
||||||
|
|
||||||
|
|
||||||
|
WARNING:Do not use cv2.waitKey(0) cv2.destroyAllWindows()!!! Or the program will be destoried
|
||||||
|
|
||||||
|
Each round, your answer should ALWAYS use the following format(Each of your response should contain code, until you complete the task):
|
||||||
|
|
||||||
|
|
||||||
|
Analyse:(Analyse the message you received and plan what you should do)
|
||||||
|
|
||||||
|
This Step Todo: One Subtask need to be done at this step
|
||||||
|
|
||||||
|
Code(WARNING:MAKE SURE YOU CODE FOLLOW THE FORMAT AND WRITE CODE OR THE TASK WILL BE FAILED):
|
||||||
|
<|execute_start|>
|
||||||
|
```python
|
||||||
|
|
||||||
|
<your code>
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
<|execute_end|>
|
||||||
|
|
||||||
|
|
||||||
|
You will got the result of your code after each step. When the code of previous subtask is excuted successfully, you can write and excuet the code for next subtask
|
||||||
|
When all the code your write are executed and you got the code result that can fulfill the user query, you should summarize the previous analyse process and make a formal response to user, The response should follow this format:
|
||||||
|
WARNING:MAKE SURE YOU GET THE CODE EXECUTED RESULT THAT FULFILLED ALL REQUIREMENT OF USER BEFORE USE "Finished"
|
||||||
|
Finished: <Answer to user query>
|
||||||
|
|
||||||
|
Some notice:
|
||||||
|
1. When you want to draw a plot, use plt.savefig() and print the image path in markdown format instead of plt.show()
|
||||||
|
2. Save anything to ./output folder
|
||||||
|
3. End the process whenever you complete the task, When you do not have Action(Code), Use: Finished: <summary the analyse process and make response>
|
||||||
|
4. Do not ask for user input in your python code.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def execute_code(code):
|
||||||
|
|
||||||
|
stdout_capture = io.StringIO()
|
||||||
|
stderr_capture = io.StringIO()
|
||||||
|
|
||||||
|
# Note here we simplely imitate notebook output.
|
||||||
|
# if you want to run more complex tasks, try to use nbclient to run python code
|
||||||
|
lines = code.strip().split('\n')
|
||||||
|
last_expr = lines[-1].strip()
|
||||||
|
|
||||||
|
if '=' in last_expr:
|
||||||
|
value = last_expr.split('=')[0].strip()
|
||||||
|
code += f"\nprint({value})"
|
||||||
|
|
||||||
|
with contextlib.redirect_stdout(stdout_capture), contextlib.redirect_stderr(stderr_capture):
|
||||||
|
try:
|
||||||
|
# execute code here
|
||||||
|
exec(code)
|
||||||
|
except Exception as e:
|
||||||
|
return {'output': stdout_capture.getvalue(), 'error': str(e)}
|
||||||
|
|
||||||
|
return {'output': stdout_capture.getvalue(), 'error': stderr_capture.getvalue()}
|
||||||
|
|
||||||
|
class DemoLLM:
|
||||||
|
def __init__(self, model_path):
|
||||||
|
# Initialize default sampling parameters
|
||||||
|
params_dict = {
|
||||||
|
"n": 1,
|
||||||
|
"best_of": None,
|
||||||
|
"presence_penalty": 0.0,
|
||||||
|
"frequency_penalty": 0.0,
|
||||||
|
"repetition_penalty": 1.02,
|
||||||
|
"temperature": 1.0,
|
||||||
|
"top_p": 0.85,
|
||||||
|
"top_k": -1,
|
||||||
|
"use_beam_search": False,
|
||||||
|
"length_penalty": 1.0,
|
||||||
|
"early_stopping": False,
|
||||||
|
"stop": None,
|
||||||
|
"stop_token_ids": None,
|
||||||
|
"ignore_eos": False,
|
||||||
|
"max_tokens": 300,
|
||||||
|
"logprobs": None,
|
||||||
|
"prompt_logprobs": None,
|
||||||
|
"skip_special_tokens": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create a SamplingParams object
|
||||||
|
self.sampling_params = SamplingParams(**params_dict)
|
||||||
|
|
||||||
|
# Initialize the language model
|
||||||
|
self.llm = LLM(
|
||||||
|
model=model_path,
|
||||||
|
tensor_parallel_size=1,
|
||||||
|
trust_remote_code=True,
|
||||||
|
enforce_eager=True
|
||||||
|
)
|
||||||
|
|
||||||
|
def apply_template(self, messages):
|
||||||
|
"""Formats messages into a prompt string for the LLM."""
|
||||||
|
formatted_messages = [
|
||||||
|
f"<|im_start|>{msg['role']}\n{msg['content']}<|im_end|>\n"
|
||||||
|
for msg in messages
|
||||||
|
]
|
||||||
|
formatted_messages.append("<|im_start|>assistant\n")
|
||||||
|
return ''.join(formatted_messages)
|
||||||
|
|
||||||
|
def generate(self, messages):
|
||||||
|
"""Generates a response from the LLM based on the input messages."""
|
||||||
|
raw_input = self.apply_template(messages)
|
||||||
|
response = self.llm.generate(raw_input, self.sampling_params)
|
||||||
|
if response:
|
||||||
|
return response[0].outputs[0].text
|
||||||
|
return None
|
||||||
|
|
||||||
|
def extract_code(text):
|
||||||
|
""" Extracts Python code blocks from the given text. """
|
||||||
|
# Define a regular expression pattern to match Python code blocks
|
||||||
|
pattern = r'```python\s+(.*?)\s+```'
|
||||||
|
matches = re.findall(pattern, text, re.DOTALL)
|
||||||
|
|
||||||
|
return matches
|
||||||
|
|
||||||
|
def process(model_path):
|
||||||
|
"""
|
||||||
|
Processes interactions with the DemoLLM using provided model path.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model_path (str): The path to the language model directory.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Initialize the language model
|
||||||
|
llm = DemoLLM(model_path)
|
||||||
|
|
||||||
|
# Define initial messages
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": system_prompt_template},
|
||||||
|
{"role": "user", "content": "2 的 100 次方是多少?"},
|
||||||
|
]
|
||||||
|
|
||||||
|
for index in range(max_turns):
|
||||||
|
print(f"Turn {index+1} start...")
|
||||||
|
|
||||||
|
# Generate response from the LLM
|
||||||
|
raw_resp = llm.generate(messages)
|
||||||
|
print(f"Raw response: {raw_resp}")
|
||||||
|
|
||||||
|
# Check if the response contains the termination keyword
|
||||||
|
if "Finished" in raw_resp:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Extract code from the raw response
|
||||||
|
code_list = extract_code(raw_resp)
|
||||||
|
|
||||||
|
if not code_list:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Execute the extracted code
|
||||||
|
code_str = code_list[-1]
|
||||||
|
run_result = execute_code(code_str)
|
||||||
|
executor_response = run_result['output'] if run_result['error'] == "" else run_result['error']
|
||||||
|
print(f"Code execution result: {run_result}")
|
||||||
|
|
||||||
|
# Append the execution result to the messages
|
||||||
|
messages.append({"role": "user", "content": executor_response})
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
fire.Fire(process)
|
||||||
120
demo/function_calling.py
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# encoding: utf-8
|
||||||
|
from transformers import AutoTokenizer
|
||||||
|
from vllm import LLM, SamplingParams
|
||||||
|
import json
|
||||||
|
|
||||||
|
model_path = "openbmb/MiniCPM3-4B"
|
||||||
|
|
||||||
|
tools = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_delivery_date",
|
||||||
|
"description": "Get the delivery date for a customer's order. Call this whenever you need to know the delivery date, for example when a customer asks 'Where is my package'",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"order_id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The customer's order ID.",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["order_id"],
|
||||||
|
"additionalProperties": False,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a helpful customer support assistant. Use the supplied tools to assist the user.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hi, can you tell me the delivery date for my order? The order id is 1234 and 4321.",
|
||||||
|
},
|
||||||
|
# {
|
||||||
|
# "content": "",
|
||||||
|
# "tool_calls": [
|
||||||
|
# {
|
||||||
|
# "type": "function",
|
||||||
|
# "function": {
|
||||||
|
# "name": "get_delivery_date",
|
||||||
|
# "arguments": {"order_id": "1234"},
|
||||||
|
# },
|
||||||
|
# "id": "call_b4ab0b4ec4b5442e86f017fe0385e22e",
|
||||||
|
# },
|
||||||
|
# {
|
||||||
|
# "type": "function",
|
||||||
|
# "function": {
|
||||||
|
# "name": "get_delivery_date",
|
||||||
|
# "arguments": {"order_id": "4321"},
|
||||||
|
# },
|
||||||
|
# "id": "call_628965479dd84794bbb72ab9bdda0c39",
|
||||||
|
# },
|
||||||
|
# ],
|
||||||
|
# "role": "assistant",
|
||||||
|
# },
|
||||||
|
# {
|
||||||
|
# "role": "tool",
|
||||||
|
# "content": '{"delivery_date": "2024-09-05", "order_id": "1234"}',
|
||||||
|
# "tool_call_id": "call_b4ab0b4ec4b5442e86f017fe0385e22e",
|
||||||
|
# },
|
||||||
|
# {
|
||||||
|
# "role": "tool",
|
||||||
|
# "content": '{"delivery_date": "2024-09-05", "order_id": "4321"}',
|
||||||
|
# "tool_call_id": "call_628965479dd84794bbb72ab9bdda0c39",
|
||||||
|
# },
|
||||||
|
# {
|
||||||
|
# "content": "Both your orders will be delivered on 2024-09-05.",
|
||||||
|
# "role": "assistant",
|
||||||
|
# "thought": "\nI have the information you need, both orders will be delivered on the same date, 2024-09-05.\n",
|
||||||
|
# },
|
||||||
|
]
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||||
|
prompt = tokenizer.apply_chat_template(
|
||||||
|
messages, tools=tools, tokenize=False, add_generation_prompt=True
|
||||||
|
)
|
||||||
|
llm = LLM(model_path, trust_remote_code=True)
|
||||||
|
sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=1000)
|
||||||
|
|
||||||
|
|
||||||
|
def fake_tool_execute(toolcall):
|
||||||
|
data = {
|
||||||
|
"delivery_date": "2024-09-05",
|
||||||
|
"order_id": toolcall.get("function", {})
|
||||||
|
.get("arguments", {})
|
||||||
|
.get("order_id", "order_id"),
|
||||||
|
}
|
||||||
|
return json.dumps(data)
|
||||||
|
|
||||||
|
|
||||||
|
while True:
|
||||||
|
prompt = tokenizer.apply_chat_template(
|
||||||
|
messages, tools=tools, tokenize=False, add_generation_prompt=True
|
||||||
|
)
|
||||||
|
outputs = llm.generate([prompt], sampling_params)
|
||||||
|
response = outputs[0].outputs[0].text
|
||||||
|
msg = tokenizer.decode_function_call(response)
|
||||||
|
if (
|
||||||
|
"tool_calls" in msg
|
||||||
|
and msg["tool_calls"] is not None
|
||||||
|
and len(msg["tool_calls"]) > 0
|
||||||
|
):
|
||||||
|
messages.append(msg)
|
||||||
|
print(msg)
|
||||||
|
for toolcall in msg["tool_calls"]:
|
||||||
|
tool_response = fake_tool_execute(toolcall)
|
||||||
|
tool_msg = {
|
||||||
|
"role": "tool",
|
||||||
|
"content": tool_response,
|
||||||
|
"tool_call_id": toolcall["id"],
|
||||||
|
}
|
||||||
|
messages.append(tool_msg)
|
||||||
|
print(tool_msg)
|
||||||
|
else:
|
||||||
|
messages.append(msg)
|
||||||
|
print(msg)
|
||||||
|
break
|
||||||
@ -1,58 +0,0 @@
|
|||||||
from vllm import LLM, SamplingParams
|
|
||||||
import argparse
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
|
|
||||||
parser.add_argument("--model_path", type=str, default="")
|
|
||||||
parser.add_argument("--prompt_path", type=str, default="")
|
|
||||||
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
with open(args.prompt_path, "r") as f:
|
|
||||||
prompts = f.readlines()
|
|
||||||
|
|
||||||
prompt_template = "<用户>{}<AI>"
|
|
||||||
|
|
||||||
prompts = [prompt_template.format(prompt.strip()) for prompt in prompts]
|
|
||||||
|
|
||||||
params_dict = {
|
|
||||||
"n": 1,
|
|
||||||
"best_of": 1,
|
|
||||||
"presence_penalty": 1.0,
|
|
||||||
"frequency_penalty": 0.0,
|
|
||||||
"temperature": 0.5,
|
|
||||||
"top_p": 0.8,
|
|
||||||
"top_k": -1,
|
|
||||||
"use_beam_search": False,
|
|
||||||
"length_penalty": 1,
|
|
||||||
"early_stopping": False,
|
|
||||||
"stop": None,
|
|
||||||
"stop_token_ids": None,
|
|
||||||
"ignore_eos": False,
|
|
||||||
"max_tokens": 1000,
|
|
||||||
"logprobs": None,
|
|
||||||
"prompt_logprobs": None,
|
|
||||||
"skip_special_tokens": True,
|
|
||||||
}
|
|
||||||
|
|
||||||
# Create a sampling params object.
|
|
||||||
sampling_params = SamplingParams(**params_dict)
|
|
||||||
|
|
||||||
# Create an LLM.
|
|
||||||
llm = LLM(model=args.model_path, tensor_parallel_size=1, dtype='bfloat16')
|
|
||||||
# Generate texts from the prompts. The output is a list of RequestOutput objects
|
|
||||||
# that contain the prompt, generated text, and other information.
|
|
||||||
for prompt in prompts:
|
|
||||||
outputs = llm.generate(prompt, sampling_params)
|
|
||||||
# Print the outputs.
|
|
||||||
for output in outputs:
|
|
||||||
prompt = output.prompt
|
|
||||||
generated_text = output.outputs[0].text
|
|
||||||
print("================")
|
|
||||||
# find the first <用户> and remove the text before it.
|
|
||||||
clean_prompt = prompt[prompt.find("<用户>")+len("<用户>"):]
|
|
||||||
|
|
||||||
print(f"""<用户>: {clean_prompt.replace("<AI>", "")}""")
|
|
||||||
print(f"<AI>:")
|
|
||||||
print(generated_text)
|
|
||||||