from transformers import LlamaTokenizer, AutoModelForCausalLM, AutoTokenizer
from configs import TOOL_CONFIG
import torch


class ModelContainer:
    def __init__(self):
        self.model = None
        self.metadata = None

        self.vision_model = None
        self.vision_tokenizer = None
        self.audio_tokenizer = None
        self.audio_model = None

        if TOOL_CONFIG["vqa_processor"]["use"]:
            self.vision_tokenizer = LlamaTokenizer.from_pretrained(
                TOOL_CONFIG["vqa_processor"]["tokenizer_path"],
                trust_remote_code=True)
            self.vision_model = AutoModelForCausalLM.from_pretrained(
                pretrained_model_name_or_path=TOOL_CONFIG["vqa_processor"]["model_path"],
                torch_dtype=torch.bfloat16,
                low_cpu_mem_usage=True,
                trust_remote_code=True
            ).to(TOOL_CONFIG["vqa_processor"]["device"]).eval()

        if TOOL_CONFIG["aqa_processor"]["use"]:
            self.audio_tokenizer = AutoTokenizer.from_pretrained(
                TOOL_CONFIG["aqa_processor"]["tokenizer_path"],
                trust_remote_code=True
            )
            self.audio_model = AutoModelForCausalLM.from_pretrained(
                pretrained_model_name_or_path=TOOL_CONFIG["aqa_processor"]["model_path"],
                torch_dtype=torch.bfloat16,
                low_cpu_mem_usage=True,
                trust_remote_code=True).to(
                TOOL_CONFIG["aqa_processor"]["device"]
            ).eval()


container = ModelContainer()