调整根目录结构,kb/logs/media/nltk_data 移动到专用数据目录(可配置,默认 data)。注意知识库文件要做相应移动

This commit is contained in:
liunux4odoo 2024-03-07 09:17:11 +08:00
parent e06d56b8bc
commit 1118922387
116 changed files with 6 additions and 933 deletions

7
.gitignore vendored
View File

@ -3,9 +3,10 @@
*.bak
logs
/media/
/knowledge_base/*
!/knowledge_base/samples
/knowledge_base/samples/vector_store
/data/*
!/data/knowledge_base/samples
/data/knowledge_base/samples/vector_store
!/data/nltk_data
/configs/*.py
/configs/loom.yaml

2
.gitmodules vendored
View File

@ -1,3 +1,3 @@
[submodule "knowledge_base/samples/content/wiki"]
path = knowledge_base/samples/content/wiki
path = data/knowledge_base/samples/content/wiki
url = https://github.com/chatchat-space/Langchain-Chatchat.wiki.git

View File

@ -1,94 +0,0 @@
import multiprocessing as mp
from multiprocessing import Process
from typing import List
from loom_core.constants import LOOM_LOG_BACKUP_COUNT, LOOM_LOG_MAX_BYTES
from loom_core.openai_plugins.core.adapter import ProcessesInfo
from loom_core.openai_plugins.core.application import ApplicationAdapter
import os
import sys
import logging
from loom_core.openai_plugins.deploy.utils import get_timestamp_ms, get_config_dict, get_log_file
from omegaconf import OmegaConf
logger = logging.getLogger(__name__)
# 为了能使用插件中的函数需要将当前目录加入到sys.path中
root_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.append(root_dir)
import imitater_process_dict
from imitater_config import ImitaterCfg
from imitater_wrapper import run_imitater
class ImitaterApplicationAdapter(ApplicationAdapter):
model_worker_started: mp.Event = None
def __init__(self, cfg=None, state_dict: dict = None):
self.processesInfo = None
self._cfg = ImitaterCfg(cfg=cfg)
super().__init__(state_dict=state_dict)
def class_name(self) -> str:
"""Get class name."""
return self.__name__
@classmethod
def from_config(cls, cfg=None):
_state_dict = {
"application_name": "Imitate",
"application_version": "0.0.1",
"application_description": "Imitate application",
"application_author": "Imitate"
}
state_dict = cfg.get("state_dict", {})
if state_dict is not None and _state_dict is not None:
_state_dict = {**state_dict, **_state_dict}
else:
# 处理其中一个或两者都为 None 的情况
_state_dict = state_dict or _state_dict or {}
return cls(cfg=cfg, state_dict=_state_dict)
def init_processes(self, processesInfo: ProcessesInfo):
self.processesInfo = processesInfo
logging_conf = get_config_dict(
processesInfo.log_level,
get_log_file(log_path=self._cfg.get_cfg().get("logdir"), sub_dir=f"local_{get_timestamp_ms()}"),
LOOM_LOG_BACKUP_COUNT,
LOOM_LOG_MAX_BYTES,
)
logging.config.dictConfig(logging_conf) # type: ignore
worker_name = self._cfg.get_cfg().get("worker_name", [])
imitater_process_dict.mp_manager = mp.Manager()
# prevent re-init cuda error.
mp.set_start_method(method="spawn", force=True)
self.model_worker_started = imitater_process_dict.mp_manager.Event()
process = Process(
target=run_imitater,
name=f"model_worker - {worker_name}",
kwargs=dict(cfg=self._cfg,
worker_name=worker_name,
started_event=self.model_worker_started,
logging_conf=logging_conf),
daemon=True,
)
imitater_process_dict.processes[worker_name] = process
def start(self):
for n, p in imitater_process_dict.processes.items():
p.start()
p.name = f"{p.name} ({p.pid})"
# 等待 model_worker启动完成
# self.model_worker_started.wait()
def stop(self):
imitater_process_dict.stop()

View File

@ -1,80 +0,0 @@
import time
from multiprocessing import Process
from loom_core.openai_plugins.core.control import ControlAdapter
import os
import sys
import logging
logger = logging.getLogger(__name__)
# 为了能使用插件中的函数需要将当前目录加入到sys.path中
root_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.append(root_dir)
import imitater_process_dict
from imitater_wrapper import run_imitater
from imitater_config import ImitaterCfg
class ImitaterControlAdapter(ControlAdapter):
def __init__(self, cfg=None, state_dict: dict = None):
self._cfg = ImitaterCfg(cfg=cfg)
super().__init__(state_dict=state_dict)
def class_name(self) -> str:
"""Get class name."""
return self.__name__
def start_model(self, new_model_name):
imitater_process_dict.stop()
logger.info(f"准备启动新进程:{new_model_name}")
e = imitater_process_dict.mp_manager.Event()
process = Process(
target=run_imitater,
name=f"model_worker - {new_model_name}",
kwargs=dict(cfg=self._cfg,
worker_name=new_model_name,
started_event=e),
daemon=True,
)
process.start()
process.name = f"{process.name} ({process.pid})"
imitater_process_dict.processes[new_model_name] = process
# e.wait()
logger.info(f"成功启动新进程:{new_model_name}")
def stop_model(self, model_name: str):
if model_name in imitater_process_dict.processes:
process = imitater_process_dict.processes.pop(model_name)
time.sleep(1)
process.kill()
logger.info(f"停止进程:{model_name}")
else:
logger.error(f"未找到进程:{model_name}")
raise Exception(f"未找到进程:{model_name}")
def replace_model(self, model_name: str, new_model_name: str):
pass
@classmethod
def from_config(cls, cfg=None):
_state_dict = {
"controller_name": "Imitate",
"controller_version": "0.0.1",
"controller_description": "Imitate controller",
"controller_author": "Imitate"
}
state_dict = cfg.get("state_dict", {})
if state_dict is not None and _state_dict is not None:
_state_dict = {**state_dict, **_state_dict}
else:
# 处理其中一个或两者都为 None 的情况
_state_dict = state_dict or _state_dict or {}
return cls(cfg=cfg, state_dict=_state_dict)

View File

@ -1,8 +0,0 @@
{
"bos_token_id": 1,
"eos_token_id": [2, 92542],
"pad_token_id": 2,
"max_new_tokens": 1024,
"do_sample": true,
"transformers_version": "4.33.2"
}

View File

@ -1,9 +0,0 @@
{
"eos_token_id": [151643, 151645],
"pad_token_id": 151643,
"max_new_tokens": 1024,
"do_sample": true,
"top_k": 0,
"top_p": 0.8,
"transformers_version": "4.34.0"
}

View File

@ -1,80 +0,0 @@
# httpx 请求默认超时时间(秒)。如果加载模型或对话较慢,出现超时错误,可以适当加大该值。
from typing import List, TypedDict
HTTPX_DEFAULT_TIMEOUT = 300.0
log_verbose = True
class ImitaterModel(TypedDict):
name: str
chat_model_path: str
chat_model_device: str
chat_template_path: str
generation_config_path: str
agent_type: str
class ImitaterEmbedding(TypedDict):
name: str
embed_model_path: str
embed_model_device: str
embed_batch_size: int
class ImitaterWorker(TypedDict):
name: str
model: ImitaterModel
embedding: ImitaterEmbedding
class ImitaterCfg:
def __init__(self, cfg: dict = None):
if cfg is None:
raise RuntimeError("ImitaterCfg cfg is None.")
self._cfg = cfg
def get_cfg(self):
return self._cfg
def get_run_openai_api_cfg(self):
return self._cfg.get("run_openai_api", {})
def get_imitate_model_workers_by_name(self, name: str) -> ImitaterWorker:
imitate_model_workers_cfg = self._cfg.get("imitate_model_workers", None)
if imitate_model_workers_cfg is None:
raise RuntimeError("imitate_model_workers_cfg is None.")
get = lambda model_name: imitate_model_workers_cfg[
self.get_imitate_model_workers_index_by_name(model_name)
].get(model_name, {})
imitate = get(name)
# 初始化imitate为ImitaterWorker
worker_cfg = ImitaterWorker(name=name,
model=ImitaterModel(**imitate.get("model", {})),
embedding=ImitaterEmbedding(**imitate.get("embedding", {}))
)
return worker_cfg
def get_imitate_model_workers_names(self) -> List[str]:
imitate_model_workers_cfg = self._cfg.get("imitate_model_workers", None)
if imitate_model_workers_cfg is None:
raise RuntimeError("imitate_model_workers_cfg is None.")
worker_name_cfg = []
for cfg in imitate_model_workers_cfg:
for key, imitate_model_workers in cfg.items():
worker_name_cfg.append(key)
return worker_name_cfg
def get_imitate_model_workers_index_by_name(self, name) -> int:
imitate_model_workers_cfg = self._cfg.get("imitate_model_workers", None)
if imitate_model_workers_cfg is None:
raise RuntimeError("imitate_model_workers_cfg is None.")
for cfg in imitate_model_workers_cfg:
for key, imitate_model_workers in cfg.items():
if key == name:
return imitate_model_workers_cfg.index(cfg)
return -1

View File

@ -1,22 +0,0 @@
from multiprocessing import Process
from typing import Dict
import logging
logger = logging.getLogger(__name__)
mp_manager = None
processes: Dict[str, Process] = {}
def stop():
for n, process in processes.items():
logger.warning("Sending SIGKILL to %s", p)
try:
process.kill()
except Exception as e:
logger.info("Failed to kill process %s", p, exc_info=True)
for n, p in processes.items():
logger.info("Process status: %s", p)
del processes

View File

@ -1,60 +0,0 @@
import multiprocessing
from typing import List, Optional, Dict
from fastapi import FastAPI
import sys
import multiprocessing as mp
import uvicorn
import os
import logging
import asyncio
import signal
import inspect
logger = logging.getLogger(__name__)
# 为了能使用fastchat_wrapper.py中的函数需要将当前目录加入到sys.path中
root_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.append(root_dir)
from imitater_config import ImitaterCfg
"""
防止Can't pickle Function
"""
def _start_imitater(
started_event: mp.Event = None
):
from imitater.service.app import launch_app
# 跳过键盘中断,
signal.signal(signal.SIGINT, lambda *_: None)
launch_app()
def run_imitater(
cfg: ImitaterCfg,
worker_name: str,
started_event: mp.Event = None,
logging_conf: Optional[dict] = None):
# 跳过键盘中断,
signal.signal(signal.SIGINT, lambda *_: None)
logging.config.dictConfig(logging_conf) # type: ignore
import os
worker_cfg = cfg.get_imitate_model_workers_by_name(worker_name)
os.environ["AGENT_TYPE"] = worker_cfg.get("model").get("agent_type")
os.environ["CHAT_MODEL_PATH"] = worker_cfg.get("model").get("chat_model_path")
os.environ["CHAT_MODEL_DEVICE"] = worker_cfg.get("model").get("chat_model_device")
os.environ["CHAT_TEMPLATE_PATH"] = worker_cfg.get("model").get("chat_template_path")
os.environ["GENERATION_CONFIG_PATH"] = worker_cfg.get("model").get("generation_config_path")
os.environ["EMBED_MODEL_PATH"] = worker_cfg.get("embedding").get("embed_model_path")
os.environ["EMBED_MODEL_DEVICE"] = worker_cfg.get("embedding").get("embed_model_device")
os.environ["EMBED_BATCH_SIZE"] = str(worker_cfg.get("embedding").get("embed_batch_size"))
os.environ["SERVICE_PORT"] = str(cfg.get_run_openai_api_cfg().get("port", 30000))
_start_imitater(started_event=started_event)

Some files were not shown because too many files have changed in this diff Show More