diff --git a/.gitignore b/.gitignore
index 6a2ccef5..05c4b8e2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@ logs
 /knowledge_base/samples/vector_store
 
 /configs/*.py
+/configs/loom.yaml
 .vscode/
 
 # below are standard python ignore files
diff --git a/configs/loom.yaml b/configs/loom.yaml
deleted file mode 100644
index 17ebe0f8..00000000
--- a/configs/loom.yaml
+++ /dev/null
@@ -1,333 +0,0 @@
-log_path: "logs"
-log_level: "DEBUG"
-
-api_server:
-  host: "127.0.0.1"
-  port: 8000
-
-publish_server:
-  host: "127.0.0.1"
-  port: 8001
-
-openai_plugins_folder:
-  - "openai_plugins"
-openai_plugins_load_folder:
-  - "configs"
-
-
-plugins:
-  - openai:
-      name: "openai"
-
-  - imitater:
-      name: "imitater"
-      logdir: "/media/gpt4-pdf-chatbot-langchain/langchain-chatchat-archive/logs"
-      worker_name: "worker1"
-      run_openai_api:
-        host: "127.0.0.1"
-        port: 30000
-      imitate_model_workers:
-        - worker1:
-            model:
-                name: "Qwen-1_8B-Chat"
-                chat_model_path: "/media/checkpoint/Qwen-1_8B"
-                chat_model_device: "0"
-                chat_template_path: "/media/gpt4-pdf-chatbot-langchain/langchain-chatchat-archive/openai_plugins/imitater/templates/qwen.jinja"
-                generation_config_path: "/media/gpt4-pdf-chatbot-langchain/langchain-chatchat-archive/openai_plugins/imitater/generation_config/qwen"
-                agent_type: "react"
-
-            embedding:
-                name: "bge-large-zh"
-                embed_model_path: "/media/checkpoint/bge-large-zh"
-                embed_model_device: "0"
-                embed_batch_size: 16
-
-  - fastchat:
-      name: "fastchat"
-      logdir: "logs"
-      # LLM 运行设备。设为"auto"会自动检测，也可手动设定为"cuda","mps","cpu"其中之一。
-      llm_device: "auto"
-      model_names:
-        - "chatglm3-6b"
-      run_controller:
-        host: "127.0.0.1"
-        port: 20001
-        dispatch_method: "shortest_queue"
-      run_openai_api:
-        host: "127.0.0.1"
-        port: 20000
-      fschat_model_workers:
-        - default:
-            host: "127.0.0.1"
-            port: 20002
-            device: "auto"
-            infer_turbo: False
-
-            # model_worker多卡加载需要配置的参数
-            # "gpus": None, # 使用的GPU，以str的格式指定，如"0,1"，如失效请使用CUDA_VISIBLE_DEVICES="0,1"等形式指定
-            # "num_gpus": 1, # 使用GPU的数量
-            # "max_gpu_memory": "20GiB", # 每个GPU占用的最大显存
-
-            # 以下为model_worker非常用参数，可根据需要配置
-            # "load_8bit": False, # 开启8bit量化
-            # "cpu_offloading": None,
-            # "gptq_ckpt": None,
-            # "gptq_wbits": 16,
-            # "gptq_groupsize": -1,
-            # "gptq_act_order": False,
-            # "awq_ckpt": None,
-            # "awq_wbits": 16,
-            # "awq_groupsize": -1,
-            # "model_names": LLM_MODELS,
-            # "conv_template": None,
-            # "limit_worker_concurrency": 5,
-            # "stream_interval": 2,
-            # "no_register": False,
-            # "embed_in_truncate": False,
-
-            # 以下为vllm_worker配置参数,注意使用vllm必须有gpu，仅在Linux测试通过
-
-            # tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加
-            # 'tokenizer_mode':'auto',
-            # 'trust_remote_code':True,
-            # 'download_dir':None,
-            # 'load_format':'auto',
-            # 'dtype':'auto',
-            # 'seed':0,
-            # 'worker_use_ray':False,
-            # 'pipeline_parallel_size':1,
-            # 'tensor_parallel_size':1,
-            # 'block_size':16,
-            # 'swap_space':4 , # GiB
-            # 'gpu_memory_utilization':0.90,
-            # 'max_num_batched_tokens':2560,
-            # 'max_num_seqs':256,
-            # 'disable_log_stats':False,
-            # 'conv_template':None,
-            # 'limit_worker_concurrency':5,
-            # 'no_register':False,
-            # 'num_gpus': 1
-            # 'engine_use_ray': False,
-            # 'disable_log_requests': False
-
-        - chatglm3-6b:
-            host: "127.0.0.1"
-            device: "cuda"
-            port: 20009
-
-        - internlm2-chat-7b:
-            host: "127.0.0.1"
-            device: "cuda"
-            port: 20009
-
-          # 以下配置可以不用修改，在model_config中设置启动的模型
-        - zhipu-api:
-            port: 21001
-
-        - minimax-api:
-            port: 21002
-
-        - xinghuo-api:
-            port: 21003
-
-        - qianfan-api:
-            port: 21004
-
-        - fangzhou-api:
-            port: 21005
-
-        - qwen-api:
-            port: 21006
-
-        - baichuan-api:
-            port: 21007
-
-        - azure-api:
-            port: 21008
-
-        - tiangong-api:
-            port: 21009
-      online_llm_model:
-        # 线上模型。请在server_config中为每个在线API设置不同的端口
-
-        - "openai-api":
-            "model_name": "gpt-3.5-turbo"
-            "api_base_url": "https://api.openai.com/v1"
-            "api_key": ""
-            "openai_proxy": ""
-
-        # 具体注册及api key获取请前往 http://open.bigmodel.cn
-        - "zhipu-api":
-            "api_key": ""
-            "version": "chatglm_turbo"  # 可选包括 "chatglm_turbo"
-            "provider": "ChatGLMWorker"
-
-        # 具体注册及api key获取请前往 https://api.minimax.chat/
-        - "minimax-api":
-            "group_id": ""
-            "api_key": ""
-            "is_pro": False
-            "provider": "MiniMaxWorker"
-
-        # 具体注册及api key获取请前往 https://xinghuo.xfyun.cn/
-        - "xinghuo-api":
-            "APPID": ""
-            "APISecret": ""
-            "api_key": ""
-            "version": "v1.5"  # 你使用的讯飞星火大模型版本，可选包括 "v3.0", "v1.5", "v2.0"
-            "provider": "XingHuoWorker"
-
-        # 百度千帆 API，申请方式请参考 https://cloud.baidu.com/doc/WENXINWORKSHOP/s/4lilb2lpf
-        - "qianfan-api":
-            "version": "ERNIE-Bot"  # 注意大小写。当前支持 "ERNIE-Bot" 或 "ERNIE-Bot-turbo"， 更多的见官方文档。
-            "version_url": ""  # 也可以不填写version，直接填写在千帆申请模型发布的API地址
-            "api_key": ""
-            "secret_key": ""
-            "provider": "QianFanWorker"
-
-        # 火山方舟 API，文档参考 https://www.volcengine.com/docs/82379
-        - "fangzhou-api":
-            "version": "chatglm-6b-model"  # 当前支持 "chatglm-6b-model"， 更多的见文档模型支持列表中方舟部分。
-            "version_url": ""  # 可以不填写version，直接填写在方舟申请模型发布的API地址
-            "api_key": ""
-            "secret_key": ""
-            "provider": "FangZhouWorker"
-
-        # 阿里云通义千问 API，文档参考 https://help.aliyun.com/zh/dashscope/developer-reference/api-details
-        - "qwen-api":
-            "version": "qwen-turbo"  # 可选包括 "qwen-turbo", "qwen-plus"
-            "api_key": ""  # 请在阿里云控制台模型服务灵积API-KEY管理页面创建
-            "provider": "QwenWorker"
-
-        # 百川 API，申请方式请参考 https://www.baichuan-ai.com/home#api-enter
-        - "baichuan-api":
-            "version": "Baichuan2-53B"  # 当前支持 "Baichuan2-53B"， 见官方文档。
-            "api_key": ""
-            "secret_key": ""
-            "provider": "BaiChuanWorker"
-
-        # Azure API
-        - "azure-api":
-            "deployment_name": ""  # 部署容器的名字
-            "resource_name": ""  # https://{resource_name}.openai.azure.com/openai/ 填写resource_name的部分，其他部分不要填写
-            "api_version": ""  # API的版本，不是模型版本
-            "api_key": ""
-            "provider": "AzureWorker"
-
-        # 昆仑万维天工 API https://model-platform.tiangong.cn/
-        - "tiangong-api":
-            "version": "SkyChat-MegaVerse"
-            "api_key": ""
-            "secret_key": ""
-            "provider": "TianGongWorker"
-      "llm_model":
-
-        "chatglm2-6b": "THUDM/chatglm2-6b"
-        "chatglm2-6b-32k": "THUDM/chatglm2-6b-32k"
-        "chatglm3-6b": "/share/home/zyx/Models/chatglm3-6b"
-        "chatglm3-6b-32k": "THUDM/chatglm3-6b-32k"
-
-        "Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf"
-        "Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf"
-        "Llama-2-70b-chat-hf": "meta-llama/Llama-2-70b-chat-hf"
-
-        "Qwen-1_8B-Chat": "/media/checkpoint/Qwen-1_8B-Chat"
-        "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat"
-        "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat"
-        "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat"
-
-        "baichuan-7b-chat": "baichuan-inc/Baichuan-7B-Chat"
-        "baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat"
-        "baichuan2-7b-chat": "baichuan-inc/Baichuan2-7B-Chat"
-        "baichuan2-13b-chat": "baichuan-inc/Baichuan2-13B-Chat"
-
-        "internlm-7b": "internlm/internlm-7b"
-        "internlm-chat-7b": "internlm/internlm-chat-7b"
-        "internlm2-chat-7b": "internlm/internlm2-chat-7b"
-        "internlm2-chat-20b": "internlm/internlm2-chat-20b"
-
-        "BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat"
-        "BlueLM-7B-Chat-32k": "vivo-ai/BlueLM-7B-Chat-32k"
-
-        "Yi-34B-Chat": "https://huggingface.co/01-ai/Yi-34B-Chat"
-
-        "agentlm-7b": "THUDM/agentlm-7b"
-        "agentlm-13b": "THUDM/agentlm-13b"
-        "agentlm-70b": "THUDM/agentlm-70b"
-
-        "falcon-7b": "tiiuae/falcon-7b"
-        "falcon-40b": "tiiuae/falcon-40b"
-        "falcon-rw-7b": "tiiuae/falcon-rw-7b"
-
-        "aquila-7b": "BAAI/Aquila-7B"
-        "aquilachat-7b": "BAAI/AquilaChat-7B"
-        "open_llama_13b": "openlm-research/open_llama_13b"
-        "vicuna-13b-v1.5": "lmsys/vicuna-13b-v1.5"
-        "koala": "young-geng/koala"
-        "mpt-7b": "mosaicml/mpt-7b"
-        "mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter"
-        "mpt-30b": "mosaicml/mpt-30b"
-        "opt-66b": "facebook/opt-66b"
-        "opt-iml-max-30b": "facebook/opt-iml-max-30b"
-        "gpt2": "gpt2"
-        "gpt2-xl": "gpt2-xl"
-        "gpt-j-6b": "EleutherAI/gpt-j-6b"
-        "gpt4all-j": "nomic-ai/gpt4all-j"
-        "gpt-neox-20b": "EleutherAI/gpt-neox-20b"
-        "pythia-12b": "EleutherAI/pythia-12b"
-        "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
-        "dolly-v2-12b": "databricks/dolly-v2-12b"
-        "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b"
-
-      vllm_model_dict:
-        "chatglm2-6b": "THUDM/chatglm2-6b"
-        "chatglm2-6b-32k": "THUDM/chatglm2-6b-32k"
-        "chatglm3-6b": "THUDM/chatglm3-6b"
-        "chatglm3-6b-32k": "THUDM/chatglm3-6b-32k"
-
-        "Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf"
-        "Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf"
-        "Llama-2-70b-chat-hf": "meta-llama/Llama-2-70b-chat-hf"
-
-        "Qwen-1_8B-Chat": "Qwen/Qwen-1_8B-Chat"
-        "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat"
-        "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat"
-        "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat"
-
-        "baichuan-7b-chat": "baichuan-inc/Baichuan-7B-Chat"
-        "baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat"
-        "baichuan2-7b-chat": "baichuan-inc/Baichuan-7B-Chat"
-        "baichuan2-13b-chat": "baichuan-inc/Baichuan-13B-Chat"
-
-        "BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat"
-        "BlueLM-7B-Chat-32k": "vivo-ai/BlueLM-7B-Chat-32k"
-
-        "internlm-7b": "internlm/internlm-7b"
-        "internlm-chat-7b": "internlm/internlm-chat-7b"
-        "internlm2-chat-7b": "internlm/Models/internlm2-chat-7b"
-        "internlm2-chat-20b": "internlm/Models/internlm2-chat-20b"
-
-        "aquila-7b": "BAAI/Aquila-7B"
-        "aquilachat-7b": "BAAI/AquilaChat-7B"
-
-        "falcon-7b": "tiiuae/falcon-7b"
-        "falcon-40b": "tiiuae/falcon-40b"
-        "falcon-rw-7b": "tiiuae/falcon-rw-7b"
-        "gpt2": "gpt2"
-        "gpt2-xl": "gpt2-xl"
-        "gpt-j-6b": "EleutherAI/gpt-j-6b"
-        "gpt4all-j": "nomic-ai/gpt4all-j"
-        "gpt-neox-20b": "EleutherAI/gpt-neox-20b"
-        "pythia-12b": "EleutherAI/pythia-12b"
-        "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
-        "dolly-v2-12b": "databricks/dolly-v2-12b"
-        "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b"
-        "open_llama_13b": "openlm-research/open_llama_13b"
-        "vicuna-13b-v1.3": "lmsys/vicuna-13b-v1.3"
-        "koala": "young-geng/koala"
-        "mpt-7b": "mosaicml/mpt-7b"
-        "mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter"
-        "mpt-30b": "mosaicml/mpt-30b"
-        "opt-66b": "facebook/opt-66b"
-        "opt-iml-max-30b": "facebook/opt-iml-max-30b"
-