mirror of
https://github.com/RYDE-WORK/Langchain-Chatchat.git
synced 2026-01-19 21:37:20 +08:00
323 lines
8.7 KiB
Python
323 lines
8.7 KiB
Python
import asyncio
|
||
import multiprocessing as mp
|
||
import os
|
||
import subprocess
|
||
import sys
|
||
from multiprocessing import Process
|
||
from datetime import datetime
|
||
from pprint import pprint
|
||
from langchain_core._api import deprecated
|
||
|
||
# 设置numexpr最大线程数,默认为CPU核心数
|
||
try:
|
||
import numexpr
|
||
|
||
n_cores = numexpr.utils.detect_number_of_cores()
|
||
os.environ["NUMEXPR_MAX_THREADS"] = str(n_cores)
|
||
except:
|
||
pass
|
||
|
||
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
|
||
from configs import (
|
||
LOG_PATH,
|
||
log_verbose,
|
||
logger,
|
||
LLM_MODEL_CONFIG,
|
||
EMBEDDING_MODEL,
|
||
TEXT_SPLITTER_NAME,
|
||
API_SERVER,
|
||
WEBUI_SERVER,
|
||
HTTPX_DEFAULT_TIMEOUT,
|
||
)
|
||
from server.utils import (FastAPI, embedding_device)
|
||
from server.knowledge_base.migrate import create_tables
|
||
import argparse
|
||
from typing import List, Dict
|
||
from configs import VERSION
|
||
|
||
all_model_names = set()
|
||
for model_category in LLM_MODEL_CONFIG.values():
|
||
for model_name in model_category.keys():
|
||
if model_name not in all_model_names:
|
||
all_model_names.add(model_name)
|
||
|
||
all_model_names_list = list(all_model_names)
|
||
|
||
|
||
def _set_app_event(app: FastAPI, started_event: mp.Event = None):
|
||
@app.on_event("startup")
|
||
async def on_startup():
|
||
if started_event is not None:
|
||
started_event.set()
|
||
|
||
|
||
def run_api_server(started_event: mp.Event = None, run_mode: str = None):
|
||
from server.api import create_app
|
||
import uvicorn
|
||
from server.utils import set_httpx_config
|
||
set_httpx_config()
|
||
|
||
app = create_app(run_mode=run_mode)
|
||
_set_app_event(app, started_event)
|
||
|
||
host = API_SERVER["host"]
|
||
port = API_SERVER["port"]
|
||
|
||
uvicorn.run(app, host=host, port=port)
|
||
|
||
|
||
def run_webui(started_event: mp.Event = None, run_mode: str = None):
|
||
from server.utils import set_httpx_config
|
||
set_httpx_config()
|
||
|
||
host = WEBUI_SERVER["host"]
|
||
port = WEBUI_SERVER["port"]
|
||
|
||
cmd = ["streamlit", "run", "webui.py",
|
||
"--server.address", host,
|
||
"--server.port", str(port),
|
||
"--theme.base", "light",
|
||
"--theme.primaryColor", "#165dff",
|
||
"--theme.secondaryBackgroundColor", "#f5f5f5",
|
||
"--theme.textColor", "#000000",
|
||
]
|
||
if run_mode == "lite":
|
||
cmd += [
|
||
"--",
|
||
"lite",
|
||
]
|
||
p = subprocess.Popen(cmd)
|
||
started_event.set()
|
||
p.wait()
|
||
|
||
|
||
def run_loom(started_event: mp.Event = None):
|
||
from configs import LOOM_CONFIG
|
||
|
||
cmd = ["python", "-m", "loom_core.openai_plugins.deploy.local",
|
||
"-f", LOOM_CONFIG
|
||
]
|
||
|
||
p = subprocess.Popen(cmd)
|
||
started_event.set()
|
||
p.wait()
|
||
|
||
|
||
def parse_args() -> argparse.ArgumentParser:
|
||
parser = argparse.ArgumentParser()
|
||
parser.add_argument(
|
||
"-a",
|
||
"--all-webui",
|
||
action="store_true",
|
||
help="run fastchat's controller/openai_api/model_worker servers, run api.py and webui.py",
|
||
dest="all_webui",
|
||
)
|
||
parser.add_argument(
|
||
"--all-api",
|
||
action="store_true",
|
||
help="run fastchat's controller/openai_api/model_worker servers, run api.py",
|
||
dest="all_api",
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--api",
|
||
action="store_true",
|
||
help="run api.py server",
|
||
dest="api",
|
||
)
|
||
|
||
parser.add_argument(
|
||
"-w",
|
||
"--webui",
|
||
action="store_true",
|
||
help="run webui.py server",
|
||
dest="webui",
|
||
)
|
||
parser.add_argument(
|
||
"-i",
|
||
"--lite",
|
||
action="store_true",
|
||
help="以Lite模式运行:仅支持在线API的LLM对话、搜索引擎对话",
|
||
dest="lite",
|
||
)
|
||
args = parser.parse_args()
|
||
return args, parser
|
||
|
||
|
||
def dump_server_info(after_start=False, args=None):
|
||
import platform
|
||
import langchain
|
||
import fastchat
|
||
from server.utils import api_address, webui_address
|
||
|
||
print("\n")
|
||
print("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30)
|
||
print(f"操作系统:{platform.platform()}.")
|
||
print(f"python版本:{sys.version}")
|
||
print(f"项目版本:{VERSION}")
|
||
print(f"langchain版本:{langchain.__version__}. fastchat版本:{fastchat.__version__}")
|
||
print("\n")
|
||
|
||
print(f"当前使用的分词器:{TEXT_SPLITTER_NAME}")
|
||
|
||
print(f"当前Embbedings模型: {EMBEDDING_MODEL} @ {embedding_device()}")
|
||
|
||
if after_start:
|
||
print("\n")
|
||
print(f"服务端运行信息:")
|
||
if args.webui:
|
||
print(f" Chatchat WEBUI Server: {webui_address()}")
|
||
print("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30)
|
||
print("\n")
|
||
|
||
|
||
async def start_main_server():
|
||
import time
|
||
import signal
|
||
|
||
def handler(signalname):
|
||
"""
|
||
Python 3.9 has `signal.strsignal(signalnum)` so this closure would not be needed.
|
||
Also, 3.8 includes `signal.valid_signals()` that can be used to create a mapping for the same purpose.
|
||
"""
|
||
|
||
def f(signal_received, frame):
|
||
raise KeyboardInterrupt(f"{signalname} received")
|
||
|
||
return f
|
||
|
||
# This will be inherited by the child process if it is forked (not spawned)
|
||
signal.signal(signal.SIGINT, handler("SIGINT"))
|
||
signal.signal(signal.SIGTERM, handler("SIGTERM"))
|
||
|
||
mp.set_start_method("spawn")
|
||
manager = mp.Manager()
|
||
run_mode = None
|
||
|
||
args, parser = parse_args()
|
||
|
||
if args.all_webui:
|
||
args.openai_api = True
|
||
args.model_worker = True
|
||
args.api = True
|
||
args.api_worker = True
|
||
args.webui = True
|
||
|
||
elif args.all_api:
|
||
args.openai_api = True
|
||
args.model_worker = True
|
||
args.api = True
|
||
args.api_worker = True
|
||
args.webui = False
|
||
|
||
elif args.llm_api:
|
||
args.openai_api = True
|
||
args.model_worker = True
|
||
args.api_worker = True
|
||
args.api = False
|
||
args.webui = False
|
||
|
||
if args.lite:
|
||
args.model_worker = False
|
||
run_mode = "lite"
|
||
|
||
dump_server_info(args=args)
|
||
|
||
if len(sys.argv) > 1:
|
||
logger.info(f"正在启动服务:")
|
||
logger.info(f"如需查看 llm_api 日志,请前往 {LOG_PATH}")
|
||
|
||
processes = {"online_api": {}, "model_worker": {}}
|
||
|
||
def process_count():
|
||
return len(processes)
|
||
|
||
loom_started = manager.Event()
|
||
process = Process(
|
||
target=run_loom,
|
||
name=f"run_loom Server",
|
||
kwargs=dict(started_event=loom_started),
|
||
daemon=True,
|
||
)
|
||
processes["run_loom"] = process
|
||
api_started = manager.Event()
|
||
if args.api:
|
||
process = Process(
|
||
target=run_api_server,
|
||
name=f"API Server",
|
||
kwargs=dict(started_event=api_started, run_mode=run_mode),
|
||
daemon=True,
|
||
)
|
||
processes["api"] = process
|
||
|
||
webui_started = manager.Event()
|
||
if args.webui:
|
||
process = Process(
|
||
target=run_webui,
|
||
name=f"WEBUI Server",
|
||
kwargs=dict(started_event=webui_started, run_mode=run_mode),
|
||
daemon=True,
|
||
)
|
||
processes["webui"] = process
|
||
|
||
if process_count() == 0:
|
||
parser.print_help()
|
||
else:
|
||
try:
|
||
# 保证任务收到SIGINT后,能够正常退出
|
||
if p := processes.get("run_loom"):
|
||
p.start()
|
||
p.name = f"{p.name} ({p.pid})"
|
||
loom_started.wait() # 等待Loom启动完成
|
||
|
||
if p := processes.get("api"):
|
||
p.start()
|
||
p.name = f"{p.name} ({p.pid})"
|
||
api_started.wait() # 等待api.py启动完成
|
||
|
||
if p := processes.get("webui"):
|
||
p.start()
|
||
p.name = f"{p.name} ({p.pid})"
|
||
webui_started.wait() # 等待webui.py启动完成
|
||
|
||
dump_server_info(after_start=True, args=args)
|
||
|
||
# 等待所有进程退出
|
||
if p := processes.get("webui"):
|
||
|
||
p.join()
|
||
except Exception as e:
|
||
logger.error(e)
|
||
logger.warning("Caught KeyboardInterrupt! Setting stop event...")
|
||
finally:
|
||
|
||
for p in processes.values():
|
||
logger.warning("Sending SIGKILL to %s", p)
|
||
# Queues and other inter-process communication primitives can break when
|
||
# process is killed, but we don't care here
|
||
|
||
if isinstance(p, dict):
|
||
for process in p.values():
|
||
process.kill()
|
||
else:
|
||
p.kill()
|
||
|
||
for p in processes.values():
|
||
logger.info("Process status: %s", p)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
|
||
create_tables()
|
||
|
||
if sys.version_info < (3, 10):
|
||
loop = asyncio.get_event_loop()
|
||
else:
|
||
try:
|
||
loop = asyncio.get_running_loop()
|
||
except RuntimeError:
|
||
loop = asyncio.new_event_loop()
|
||
|
||
asyncio.set_event_loop(loop)
|
||
loop.run_until_complete(start_main_server())
|