zR d44ce6ce21 支持多模态Grounding
1. 美化了chat的代码
2. 支持视觉工具输出Grounding任务
3. 完善工具调用的流程
2024-03-06 13:33:48 +08:00

42 lines
1.5 KiB
Python

from transformers import LlamaTokenizer, AutoModelForCausalLM, AutoTokenizer
from configs import TOOL_CONFIG
import torch
class ModelContainer:
def __init__(self):
self.model = None
self.metadata = None
self.vision_model = None
self.vision_tokenizer = None
self.audio_tokenizer = None
self.audio_model = None
if TOOL_CONFIG["vqa_processor"]["use"]:
self.vision_tokenizer = LlamaTokenizer.from_pretrained(
TOOL_CONFIG["vqa_processor"]["tokenizer_path"],
trust_remote_code=True)
self.vision_model = AutoModelForCausalLM.from_pretrained(
pretrained_model_name_or_path=TOOL_CONFIG["vqa_processor"]["model_path"],
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True,
trust_remote_code=True
).to(TOOL_CONFIG["vqa_processor"]["device"]).eval()
if TOOL_CONFIG["aqa_processor"]["use"]:
self.audio_tokenizer = AutoTokenizer.from_pretrained(
TOOL_CONFIG["aqa_processor"]["tokenizer_path"],
trust_remote_code=True
)
self.audio_model = AutoModelForCausalLM.from_pretrained(
pretrained_model_name_or_path=TOOL_CONFIG["aqa_processor"]["model_path"],
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True,
trust_remote_code=True).to(
TOOL_CONFIG["aqa_processor"]["device"]
).eval()
container = ModelContainer()