mirror of
https://github.com/RYDE-WORK/Langchain-Chatchat.git
synced 2026-01-19 13:23:16 +08:00
支持通过配置项同时启动多个模型,将Wiki纳入samples知识库 (#2002)
新功能: - 将 LLM_MODEL 配置项改为 LLM_MODELS 列表,同时启动多个模型 - 将 wiki 纳入 samples 知识库 依赖变化: - 指定 streamlit~=1.27.0。1.26.0会报rerun错误,1.28.0会有无限刷新错误 修复优化: - 优化 get_default_llm_model 逻辑 - 适配 Qwen 在线 API 做 Embeddings 时最大 25 行的限制 - 列出知识库磁盘文件时跳过 . 开头的文件
This commit is contained in:
parent
ce1001a043
commit
b51ba11f45
5
.gitignore
vendored
5
.gitignore
vendored
@ -2,7 +2,10 @@
|
|||||||
*.log.*
|
*.log.*
|
||||||
*.bak
|
*.bak
|
||||||
logs
|
logs
|
||||||
/knowledge_base/
|
/knowledge_base/*
|
||||||
|
!/knowledge_base/samples
|
||||||
|
/knowledge_base/samples/vector_store
|
||||||
|
|
||||||
/configs/*.py
|
/configs/*.py
|
||||||
.vscode/
|
.vscode/
|
||||||
|
|
||||||
|
|||||||
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
[submodule "knowledge_base/samples/content/wiki"]
|
||||||
|
path = knowledge_base/samples/content/wiki
|
||||||
|
url = https://github.com/chatchat-space/Langchain-Chatchat.wiki.git
|
||||||
@ -1,12 +1,12 @@
|
|||||||
from server.utils import get_ChatOpenAI
|
from server.utils import get_ChatOpenAI
|
||||||
from configs.model_config import LLM_MODEL, TEMPERATURE
|
from configs.model_config import LLM_MODELS, TEMPERATURE
|
||||||
from langchain.chains import LLMChain
|
from langchain.chains import LLMChain
|
||||||
from langchain.prompts.chat import (
|
from langchain.prompts.chat import (
|
||||||
ChatPromptTemplate,
|
ChatPromptTemplate,
|
||||||
HumanMessagePromptTemplate,
|
HumanMessagePromptTemplate,
|
||||||
)
|
)
|
||||||
|
|
||||||
model = get_ChatOpenAI(model_name=LLM_MODEL, temperature=TEMPERATURE)
|
model = get_ChatOpenAI(model_name=LLM_MODELS[0], temperature=TEMPERATURE)
|
||||||
|
|
||||||
|
|
||||||
human_prompt = "{input}"
|
human_prompt = "{input}"
|
||||||
|
|||||||
@ -2,6 +2,7 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import langchain
|
import langchain
|
||||||
|
|
||||||
|
|
||||||
# 是否显示详细日志
|
# 是否显示详细日志
|
||||||
log_verbose = False
|
log_verbose = False
|
||||||
langchain.verbose = False
|
langchain.verbose = False
|
||||||
|
|||||||
@ -56,7 +56,10 @@ KB_INFO = {
|
|||||||
"知识库名称": "知识库介绍",
|
"知识库名称": "知识库介绍",
|
||||||
"samples": "关于本项目issue的解答",
|
"samples": "关于本项目issue的解答",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# 通常情况下不需要更改以下内容
|
# 通常情况下不需要更改以下内容
|
||||||
|
|
||||||
# 知识库默认存储路径
|
# 知识库默认存储路径
|
||||||
KB_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base")
|
KB_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base")
|
||||||
if not os.path.exists(KB_ROOT_PATH):
|
if not os.path.exists(KB_ROOT_PATH):
|
||||||
|
|||||||
@ -1,96 +1,13 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
# 可以指定一个绝对路径,统一存放所有的Embedding和LLM模型。
|
# 可以指定一个绝对路径,统一存放所有的Embedding和LLM模型。
|
||||||
# 每个模型可以是一个单独的目录,也可以是某个目录下的二级子目录
|
# 每个模型可以是一个单独的目录,也可以是某个目录下的二级子目录。
|
||||||
|
# 如果模型目录名称和 MODEL_PATH 中的 key 或 value 相同,程序会自动检测加载,无需修改 MODEL_PATH 中的路径。
|
||||||
MODEL_ROOT_PATH = ""
|
MODEL_ROOT_PATH = ""
|
||||||
|
|
||||||
# 在以下字典中修改属性值,以指定本地embedding模型存储位置。支持3种设置方法:
|
|
||||||
# 1、将对应的值修改为模型绝对路径
|
|
||||||
# 2、不修改此处的值(以 text2vec 为例):
|
|
||||||
# 2.1 如果{MODEL_ROOT_PATH}下存在如下任一子目录:
|
|
||||||
# - text2vec
|
|
||||||
# - GanymedeNil/text2vec-large-chinese
|
|
||||||
# - text2vec-large-chinese
|
|
||||||
# 2.2 如果以上本地路径不存在,则使用huggingface模型
|
|
||||||
MODEL_PATH = {
|
|
||||||
"embed_model": {
|
|
||||||
"ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
|
|
||||||
"ernie-base": "nghuyong/ernie-3.0-base-zh",
|
|
||||||
"text2vec-base": "shibing624/text2vec-base-chinese",
|
|
||||||
"text2vec": "GanymedeNil/text2vec-large-chinese",
|
|
||||||
"text2vec-paraphrase": "shibing624/text2vec-base-chinese-paraphrase",
|
|
||||||
"text2vec-sentence": "shibing624/text2vec-base-chinese-sentence",
|
|
||||||
"text2vec-multilingual": "shibing624/text2vec-base-multilingual",
|
|
||||||
"text2vec-bge-large-chinese": "shibing624/text2vec-bge-large-chinese",
|
|
||||||
"m3e-small": "moka-ai/m3e-small",
|
|
||||||
"m3e-base": "moka-ai/m3e-base",
|
|
||||||
"m3e-large": "moka-ai/m3e-large",
|
|
||||||
"bge-small-zh": "BAAI/bge-small-zh",
|
|
||||||
"bge-base-zh": "BAAI/bge-base-zh",
|
|
||||||
"bge-large-zh": "BAAI/bge-large-zh",
|
|
||||||
"bge-large-zh-noinstruct": "BAAI/bge-large-zh-noinstruct",
|
|
||||||
"bge-base-zh-v1.5": "BAAI/bge-base-zh-v1.5",
|
|
||||||
"bge-large-zh-v1.5": "BAAI/bge-large-zh-v1.5",
|
|
||||||
"piccolo-base-zh": "sensenova/piccolo-base-zh",
|
|
||||||
"piccolo-large-zh": "sensenova/piccolo-large-zh",
|
|
||||||
"text-embedding-ada-002": "your OPENAI_API_KEY",
|
|
||||||
},
|
|
||||||
# TODO: add all supported llm models
|
|
||||||
"llm_model": {
|
|
||||||
# 以下部分模型并未完全测试,仅根据fastchat和vllm模型的模型列表推定支持
|
|
||||||
"chatglm2-6b": "THUDM/chatglm2-6b",
|
|
||||||
"chatglm2-6b-32k": "THUDM/chatglm2-6b-32k",
|
|
||||||
"chatglm3-6b": "THUDM/chatglm3-6b-32k",
|
|
||||||
"chatglm3-6b-32k": "THUDM/chatglm3-6b-32k",
|
|
||||||
|
|
||||||
"baichuan2-13b": "baichuan-inc/Baichuan2-13B-Chat",
|
|
||||||
"baichuan2-7b": "baichuan-inc/Baichuan2-7B-Chat",
|
|
||||||
|
|
||||||
"baichuan-7b": "baichuan-inc/Baichuan-7B",
|
|
||||||
"baichuan-13b": "baichuan-inc/Baichuan-13B",
|
|
||||||
'baichuan-13b-chat': 'baichuan-inc/Baichuan-13B-Chat',
|
|
||||||
|
|
||||||
"aquila-7b": "BAAI/Aquila-7B",
|
|
||||||
"aquilachat-7b": "BAAI/AquilaChat-7B",
|
|
||||||
|
|
||||||
"internlm-7b": "internlm/internlm-7b",
|
|
||||||
"internlm-chat-7b": "internlm/internlm-chat-7b",
|
|
||||||
|
|
||||||
"falcon-7b": "tiiuae/falcon-7b",
|
|
||||||
"falcon-40b": "tiiuae/falcon-40b",
|
|
||||||
"falcon-rw-7b": "tiiuae/falcon-rw-7b",
|
|
||||||
|
|
||||||
"gpt2": "gpt2",
|
|
||||||
"gpt2-xl": "gpt2-xl",
|
|
||||||
|
|
||||||
"gpt-j-6b": "EleutherAI/gpt-j-6b",
|
|
||||||
"gpt4all-j": "nomic-ai/gpt4all-j",
|
|
||||||
"gpt-neox-20b": "EleutherAI/gpt-neox-20b",
|
|
||||||
"pythia-12b": "EleutherAI/pythia-12b",
|
|
||||||
"oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
|
|
||||||
"dolly-v2-12b": "databricks/dolly-v2-12b",
|
|
||||||
"stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b",
|
|
||||||
|
|
||||||
"Llama-2-13b-hf": "meta-llama/Llama-2-13b-hf",
|
|
||||||
"Llama-2-70b-hf": "meta-llama/Llama-2-70b-hf",
|
|
||||||
"open_llama_13b": "openlm-research/open_llama_13b",
|
|
||||||
"vicuna-13b-v1.3": "lmsys/vicuna-13b-v1.3",
|
|
||||||
"koala": "young-geng/koala",
|
|
||||||
|
|
||||||
"mpt-7b": "mosaicml/mpt-7b",
|
|
||||||
"mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter",
|
|
||||||
"mpt-30b": "mosaicml/mpt-30b",
|
|
||||||
"opt-66b": "facebook/opt-66b",
|
|
||||||
"opt-iml-max-30b": "facebook/opt-iml-max-30b",
|
|
||||||
|
|
||||||
"Qwen-7B": "Qwen/Qwen-7B",
|
|
||||||
"Qwen-14B": "Qwen/Qwen-14B",
|
|
||||||
"Qwen-7B-Chat": "Qwen/Qwen-7B-Chat",
|
|
||||||
"Qwen-14B-Chat": "Qwen/Qwen-14B-Chat",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
# 选用的 Embedding 名称
|
# 选用的 Embedding 名称
|
||||||
EMBEDDING_MODEL = "m3e-base" # 可以尝试最新的嵌入式sota模型:bge-large-zh-v1.5
|
EMBEDDING_MODEL = "m3e-base" # bge-large-zh
|
||||||
|
|
||||||
# Embedding 模型运行设备。设为"auto"会自动检测,也可手动设定为"cuda","mps","cpu"其中之一。
|
# Embedding 模型运行设备。设为"auto"会自动检测,也可手动设定为"cuda","mps","cpu"其中之一。
|
||||||
EMBEDDING_DEVICE = "auto"
|
EMBEDDING_DEVICE = "auto"
|
||||||
@ -99,9 +16,11 @@ EMBEDDING_DEVICE = "auto"
|
|||||||
EMBEDDING_KEYWORD_FILE = "keywords.txt"
|
EMBEDDING_KEYWORD_FILE = "keywords.txt"
|
||||||
EMBEDDING_MODEL_OUTPUT_PATH = "output"
|
EMBEDDING_MODEL_OUTPUT_PATH = "output"
|
||||||
|
|
||||||
# LLM 名称
|
# 要运行的 LLM 名称,可以包括本地模型和在线模型。
|
||||||
LLM_MODEL = "chatglm2-6b"
|
# 第一个将作为 API 和 WEBUI 的默认模型
|
||||||
# AgentLM模型的名称 (可以不指定,指定之后就锁定进入Agent之后的Chain的模型,不指定就是LLM_MODEL)
|
LLM_MODELS = ["chatglm2-6b-int4", "zhipu-api", "openai-api]
|
||||||
|
|
||||||
|
# AgentLM模型的名称 (可以不指定,指定之后就锁定进入Agent之后的Chain的模型,不指定就是LLM_MODELS[0])
|
||||||
Agent_MODEL = None
|
Agent_MODEL = None
|
||||||
|
|
||||||
# LLM 运行设备。设为"auto"会自动检测,也可手动设定为"cuda","mps","cpu"其中之一。
|
# LLM 运行设备。设为"auto"会自动检测,也可手动设定为"cuda","mps","cpu"其中之一。
|
||||||
@ -111,7 +30,6 @@ LLM_DEVICE = "auto"
|
|||||||
HISTORY_LEN = 3
|
HISTORY_LEN = 3
|
||||||
|
|
||||||
# 大模型最长支持的长度,如果不填写,则使用模型默认的最大长度,如果填写,则为用户设定的最大长度
|
# 大模型最长支持的长度,如果不填写,则使用模型默认的最大长度,如果填写,则为用户设定的最大长度
|
||||||
|
|
||||||
MAX_TOKENS = None
|
MAX_TOKENS = None
|
||||||
|
|
||||||
# LLM通用对话参数
|
# LLM通用对话参数
|
||||||
@ -197,6 +115,93 @@ ONLINE_LLM_MODEL = {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# 在以下字典中修改属性值,以指定本地embedding模型存储位置。支持3种设置方法:
|
||||||
|
# 1、将对应的值修改为模型绝对路径
|
||||||
|
# 2、不修改此处的值(以 text2vec 为例):
|
||||||
|
# 2.1 如果{MODEL_ROOT_PATH}下存在如下任一子目录:
|
||||||
|
# - text2vec
|
||||||
|
# - GanymedeNil/text2vec-large-chinese
|
||||||
|
# - text2vec-large-chinese
|
||||||
|
# 2.2 如果以上本地路径不存在,则使用huggingface模型
|
||||||
|
MODEL_PATH = {
|
||||||
|
"embed_model": {
|
||||||
|
"ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
|
||||||
|
"ernie-base": "nghuyong/ernie-3.0-base-zh",
|
||||||
|
"text2vec-base": "shibing624/text2vec-base-chinese",
|
||||||
|
"text2vec": "GanymedeNil/text2vec-large-chinese",
|
||||||
|
"text2vec-paraphrase": "shibing624/text2vec-base-chinese-paraphrase",
|
||||||
|
"text2vec-sentence": "shibing624/text2vec-base-chinese-sentence",
|
||||||
|
"text2vec-multilingual": "shibing624/text2vec-base-multilingual",
|
||||||
|
"text2vec-bge-large-chinese": "shibing624/text2vec-bge-large-chinese",
|
||||||
|
"m3e-small": "moka-ai/m3e-small",
|
||||||
|
"m3e-base": "moka-ai/m3e-base",
|
||||||
|
"m3e-large": "moka-ai/m3e-large",
|
||||||
|
"bge-small-zh": "BAAI/bge-small-zh",
|
||||||
|
"bge-base-zh": "BAAI/bge-base-zh",
|
||||||
|
"bge-large-zh": "BAAI/bge-large-zh",
|
||||||
|
"bge-large-zh-noinstruct": "BAAI/bge-large-zh-noinstruct",
|
||||||
|
"bge-base-zh-v1.5": "BAAI/bge-base-zh-v1.5",
|
||||||
|
"bge-large-zh-v1.5": "BAAI/bge-large-zh-v1.5",
|
||||||
|
"piccolo-base-zh": "sensenova/piccolo-base-zh",
|
||||||
|
"piccolo-large-zh": "sensenova/piccolo-large-zh",
|
||||||
|
"text-embedding-ada-002": "your OPENAI_API_KEY",
|
||||||
|
},
|
||||||
|
|
||||||
|
"llm_model": {
|
||||||
|
# 以下部分模型并未完全测试,仅根据fastchat和vllm模型的模型列表推定支持
|
||||||
|
"chatglm2-6b": "THUDM/chatglm2-6b",
|
||||||
|
"chatglm2-6b-32k": "THUDM/chatglm2-6b-32k",
|
||||||
|
"chatglm3-6b": "THUDM/chatglm3-6b-32k",
|
||||||
|
"chatglm3-6b-32k": "THUDM/chatglm3-6b-32k",
|
||||||
|
|
||||||
|
"baichuan2-13b": "baichuan-inc/Baichuan2-13B-Chat",
|
||||||
|
"baichuan2-7b": "baichuan-inc/Baichuan2-7B-Chat",
|
||||||
|
|
||||||
|
"baichuan-7b": "baichuan-inc/Baichuan-7B",
|
||||||
|
"baichuan-13b": "baichuan-inc/Baichuan-13B",
|
||||||
|
'baichuan-13b-chat': 'baichuan-inc/Baichuan-13B-Chat',
|
||||||
|
|
||||||
|
"aquila-7b": "BAAI/Aquila-7B",
|
||||||
|
"aquilachat-7b": "BAAI/AquilaChat-7B",
|
||||||
|
|
||||||
|
"internlm-7b": "internlm/internlm-7b",
|
||||||
|
"internlm-chat-7b": "internlm/internlm-chat-7b",
|
||||||
|
|
||||||
|
"falcon-7b": "tiiuae/falcon-7b",
|
||||||
|
"falcon-40b": "tiiuae/falcon-40b",
|
||||||
|
"falcon-rw-7b": "tiiuae/falcon-rw-7b",
|
||||||
|
|
||||||
|
"gpt2": "gpt2",
|
||||||
|
"gpt2-xl": "gpt2-xl",
|
||||||
|
|
||||||
|
"gpt-j-6b": "EleutherAI/gpt-j-6b",
|
||||||
|
"gpt4all-j": "nomic-ai/gpt4all-j",
|
||||||
|
"gpt-neox-20b": "EleutherAI/gpt-neox-20b",
|
||||||
|
"pythia-12b": "EleutherAI/pythia-12b",
|
||||||
|
"oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
|
||||||
|
"dolly-v2-12b": "databricks/dolly-v2-12b",
|
||||||
|
"stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b",
|
||||||
|
|
||||||
|
"Llama-2-13b-hf": "meta-llama/Llama-2-13b-hf",
|
||||||
|
"Llama-2-70b-hf": "meta-llama/Llama-2-70b-hf",
|
||||||
|
"open_llama_13b": "openlm-research/open_llama_13b",
|
||||||
|
"vicuna-13b-v1.3": "lmsys/vicuna-13b-v1.3",
|
||||||
|
"koala": "young-geng/koala",
|
||||||
|
|
||||||
|
"mpt-7b": "mosaicml/mpt-7b",
|
||||||
|
"mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter",
|
||||||
|
"mpt-30b": "mosaicml/mpt-30b",
|
||||||
|
"opt-66b": "facebook/opt-66b",
|
||||||
|
"opt-iml-max-30b": "facebook/opt-iml-max-30b",
|
||||||
|
|
||||||
|
"Qwen-7B": "Qwen/Qwen-7B",
|
||||||
|
"Qwen-14B": "Qwen/Qwen-14B",
|
||||||
|
"Qwen-7B-Chat": "Qwen/Qwen-7B-Chat",
|
||||||
|
"Qwen-14B-Chat": "Qwen/Qwen-14B-Chat",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# 通常情况下不需要更改以下内容
|
# 通常情况下不需要更改以下内容
|
||||||
|
|
||||||
# nltk 模型存储路径
|
# nltk 模型存储路径
|
||||||
|
|||||||
@ -31,8 +31,7 @@ FSCHAT_OPENAI_API = {
|
|||||||
|
|
||||||
# fastchat model_worker server
|
# fastchat model_worker server
|
||||||
# 这些模型必须是在model_config.MODEL_PATH或ONLINE_MODEL中正确配置的。
|
# 这些模型必须是在model_config.MODEL_PATH或ONLINE_MODEL中正确配置的。
|
||||||
# 在启动startup.py时,可用通过`--model-worker --model-name xxxx`指定模型,不指定则为LLM_MODEL
|
# 在启动startup.py时,可用通过`--model-name xxxx yyyy`指定模型,不指定则为LLM_MODELS
|
||||||
# 必须在这里添加的模型才会出现在WEBUI中可选模型列表里(LLM_MODEL会自动添加)
|
|
||||||
FSCHAT_MODEL_WORKERS = {
|
FSCHAT_MODEL_WORKERS = {
|
||||||
# 所有模型共用的默认配置,可在模型专项配置中进行覆盖。
|
# 所有模型共用的默认配置,可在模型专项配置中进行覆盖。
|
||||||
"default": {
|
"default": {
|
||||||
@ -58,7 +57,7 @@ FSCHAT_MODEL_WORKERS = {
|
|||||||
# "awq_ckpt": None,
|
# "awq_ckpt": None,
|
||||||
# "awq_wbits": 16,
|
# "awq_wbits": 16,
|
||||||
# "awq_groupsize": -1,
|
# "awq_groupsize": -1,
|
||||||
# "model_names": [LLM_MODEL],
|
# "model_names": LLM_MODELS,
|
||||||
# "conv_template": None,
|
# "conv_template": None,
|
||||||
# "limit_worker_concurrency": 5,
|
# "limit_worker_concurrency": 5,
|
||||||
# "stream_interval": 2,
|
# "stream_interval": 2,
|
||||||
@ -96,30 +95,31 @@ FSCHAT_MODEL_WORKERS = {
|
|||||||
# "device": "cpu",
|
# "device": "cpu",
|
||||||
# },
|
# },
|
||||||
|
|
||||||
"zhipu-api": { # 请为每个要运行的在线API设置不同的端口
|
#以下配置可以不用修改,在model_config中设置启动的模型
|
||||||
|
"zhipu-api": {
|
||||||
"port": 21001,
|
"port": 21001,
|
||||||
},
|
},
|
||||||
# "minimax-api": {
|
"minimax-api": {
|
||||||
# "port": 21002,
|
"port": 21002,
|
||||||
# },
|
},
|
||||||
# "xinghuo-api": {
|
"xinghuo-api": {
|
||||||
# "port": 21003,
|
"port": 21003,
|
||||||
# },
|
},
|
||||||
# "qianfan-api": {
|
"qianfan-api": {
|
||||||
# "port": 21004,
|
"port": 21004,
|
||||||
# },
|
},
|
||||||
# "fangzhou-api": {
|
"fangzhou-api": {
|
||||||
# "port": 21005,
|
"port": 21005,
|
||||||
# },
|
},
|
||||||
# "qwen-api": {
|
"qwen-api": {
|
||||||
# "port": 21006,
|
"port": 21006,
|
||||||
# },
|
},
|
||||||
# "baichuan-api": {
|
"baichuan-api": {
|
||||||
# "port": 21007,
|
"port": 21007,
|
||||||
# },
|
},
|
||||||
# "azure-api": {
|
"azure-api": {
|
||||||
# "port": 21008,
|
"port": 21008,
|
||||||
# },
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
# fastchat multi model worker server
|
# fastchat multi model worker server
|
||||||
|
|||||||
1
knowledge_base/samples/content/wiki
Submodule
1
knowledge_base/samples/content/wiki
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit b705cf80e4150cb900c77b343f0f9c62ec9a0278
|
||||||
@ -53,7 +53,7 @@ vllm>=0.2.0; sys_platform == "linux"
|
|||||||
|
|
||||||
# WebUI requirements
|
# WebUI requirements
|
||||||
|
|
||||||
streamlit>=1.26.0
|
streamlit~=1.27.0
|
||||||
streamlit-option-menu>=0.3.6
|
streamlit-option-menu>=0.3.6
|
||||||
streamlit-antd-components>=0.1.11
|
streamlit-antd-components>=0.1.11
|
||||||
streamlit-chatbox>=1.1.11
|
streamlit-chatbox>=1.1.11
|
||||||
|
|||||||
@ -41,7 +41,7 @@ dashscope>=1.10.0 # qwen
|
|||||||
|
|
||||||
numpy~=1.24.4
|
numpy~=1.24.4
|
||||||
pandas~=2.0.3
|
pandas~=2.0.3
|
||||||
streamlit>=1.26.0
|
streamlit~=1.27.0
|
||||||
streamlit-option-menu>=0.3.6
|
streamlit-option-menu>=0.3.6
|
||||||
streamlit-antd-components>=0.1.11
|
streamlit-antd-components>=0.1.11
|
||||||
streamlit-chatbox==1.1.11
|
streamlit-chatbox==1.1.11
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
# WebUI requirements
|
# WebUI requirements
|
||||||
|
|
||||||
streamlit>=1.26.0
|
streamlit~=1.27.0
|
||||||
streamlit-option-menu>=0.3.6
|
streamlit-option-menu>=0.3.6
|
||||||
streamlit-antd-components>=0.1.11
|
streamlit-antd-components>=0.1.11
|
||||||
streamlit-chatbox>=1.1.11
|
streamlit-chatbox>=1.1.11
|
||||||
|
|||||||
@ -5,7 +5,7 @@ from langchain.agents import AgentExecutor, LLMSingleActionAgent, initialize_age
|
|||||||
from server.agent.custom_template import CustomOutputParser, CustomPromptTemplate
|
from server.agent.custom_template import CustomOutputParser, CustomPromptTemplate
|
||||||
from fastapi import Body
|
from fastapi import Body
|
||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
from configs import LLM_MODEL, TEMPERATURE, HISTORY_LEN, Agent_MODEL
|
from configs import LLM_MODELS, TEMPERATURE, HISTORY_LEN, Agent_MODEL
|
||||||
from server.utils import wrap_done, get_ChatOpenAI, get_prompt_template
|
from server.utils import wrap_done, get_ChatOpenAI, get_prompt_template
|
||||||
from langchain.chains import LLMChain
|
from langchain.chains import LLMChain
|
||||||
from typing import AsyncIterable, Optional, Dict
|
from typing import AsyncIterable, Optional, Dict
|
||||||
@ -26,7 +26,7 @@ async def agent_chat(query: str = Body(..., description="用户输入", examples
|
|||||||
"content": "使用天气查询工具查询到今天北京多云,10-14摄氏度,东北风2级,易感冒"}]]
|
"content": "使用天气查询工具查询到今天北京多云,10-14摄氏度,东北风2级,易感冒"}]]
|
||||||
),
|
),
|
||||||
stream: bool = Body(False, description="流式输出"),
|
stream: bool = Body(False, description="流式输出"),
|
||||||
model_name: str = Body(LLM_MODEL, description="LLM 模型名称。"),
|
model_name: str = Body(LLM_MODELS[0], description="LLM 模型名称。"),
|
||||||
temperature: float = Body(TEMPERATURE, description="LLM 采样温度", ge=0.0, le=1.0),
|
temperature: float = Body(TEMPERATURE, description="LLM 采样温度", ge=0.0, le=1.0),
|
||||||
max_tokens: Optional[int] = Body(None, description="限制LLM生成Token数量,默认None代表模型最大值"),
|
max_tokens: Optional[int] = Body(None, description="限制LLM生成Token数量,默认None代表模型最大值"),
|
||||||
prompt_name: str = Body("default",
|
prompt_name: str = Body("default",
|
||||||
@ -38,7 +38,7 @@ async def agent_chat(query: str = Body(..., description="用户输入", examples
|
|||||||
async def agent_chat_iterator(
|
async def agent_chat_iterator(
|
||||||
query: str,
|
query: str,
|
||||||
history: Optional[List[History]],
|
history: Optional[List[History]],
|
||||||
model_name: str = LLM_MODEL,
|
model_name: str = LLM_MODELS[0],
|
||||||
prompt_name: str = prompt_name,
|
prompt_name: str = prompt_name,
|
||||||
) -> AsyncIterable[str]:
|
) -> AsyncIterable[str]:
|
||||||
callback = CustomAsyncIteratorCallbackHandler()
|
callback = CustomAsyncIteratorCallbackHandler()
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
from fastapi import Body
|
from fastapi import Body
|
||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
from configs import LLM_MODEL, TEMPERATURE, SAVE_CHAT_HISTORY
|
from configs import LLM_MODELS, TEMPERATURE, SAVE_CHAT_HISTORY
|
||||||
from server.utils import wrap_done, get_ChatOpenAI
|
from server.utils import wrap_done, get_ChatOpenAI
|
||||||
from langchain.chains import LLMChain
|
from langchain.chains import LLMChain
|
||||||
from langchain.callbacks import AsyncIteratorCallbackHandler
|
from langchain.callbacks import AsyncIteratorCallbackHandler
|
||||||
@ -22,7 +22,7 @@ async def chat(query: str = Body(..., description="用户输入", examples=["恼
|
|||||||
{"role": "assistant", "content": "虎头虎脑"}]]
|
{"role": "assistant", "content": "虎头虎脑"}]]
|
||||||
),
|
),
|
||||||
stream: bool = Body(False, description="流式输出"),
|
stream: bool = Body(False, description="流式输出"),
|
||||||
model_name: str = Body(LLM_MODEL, description="LLM 模型名称。"),
|
model_name: str = Body(LLM_MODELS[0], description="LLM 模型名称。"),
|
||||||
temperature: float = Body(TEMPERATURE, description="LLM 采样温度", ge=0.0, le=1.0),
|
temperature: float = Body(TEMPERATURE, description="LLM 采样温度", ge=0.0, le=1.0),
|
||||||
max_tokens: Optional[int] = Body(None, description="限制LLM生成Token数量,默认None代表模型最大值"),
|
max_tokens: Optional[int] = Body(None, description="限制LLM生成Token数量,默认None代表模型最大值"),
|
||||||
# top_p: float = Body(TOP_P, description="LLM 核采样。勿与temperature同时设置", gt=0.0, lt=1.0),
|
# top_p: float = Body(TOP_P, description="LLM 核采样。勿与temperature同时设置", gt=0.0, lt=1.0),
|
||||||
@ -32,7 +32,7 @@ async def chat(query: str = Body(..., description="用户输入", examples=["恼
|
|||||||
|
|
||||||
async def chat_iterator(query: str,
|
async def chat_iterator(query: str,
|
||||||
history: List[History] = [],
|
history: List[History] = [],
|
||||||
model_name: str = LLM_MODEL,
|
model_name: str = LLM_MODELS[0],
|
||||||
prompt_name: str = prompt_name,
|
prompt_name: str = prompt_name,
|
||||||
) -> AsyncIterable[str]:
|
) -> AsyncIterable[str]:
|
||||||
callback = AsyncIteratorCallbackHandler()
|
callback = AsyncIteratorCallbackHandler()
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
from fastapi import Body
|
from fastapi import Body
|
||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
from configs import LLM_MODEL, TEMPERATURE
|
from configs import LLM_MODELS, TEMPERATURE
|
||||||
from server.utils import wrap_done, get_OpenAI
|
from server.utils import wrap_done, get_OpenAI
|
||||||
from langchain.chains import LLMChain
|
from langchain.chains import LLMChain
|
||||||
from langchain.callbacks import AsyncIteratorCallbackHandler
|
from langchain.callbacks import AsyncIteratorCallbackHandler
|
||||||
@ -13,7 +13,7 @@ from server.utils import get_prompt_template
|
|||||||
async def completion(query: str = Body(..., description="用户输入", examples=["恼羞成怒"]),
|
async def completion(query: str = Body(..., description="用户输入", examples=["恼羞成怒"]),
|
||||||
stream: bool = Body(False, description="流式输出"),
|
stream: bool = Body(False, description="流式输出"),
|
||||||
echo: bool = Body(False, description="除了输出之外,还回显输入"),
|
echo: bool = Body(False, description="除了输出之外,还回显输入"),
|
||||||
model_name: str = Body(LLM_MODEL, description="LLM 模型名称。"),
|
model_name: str = Body(LLM_MODELS[0], description="LLM 模型名称。"),
|
||||||
temperature: float = Body(TEMPERATURE, description="LLM 采样温度", ge=0.0, le=1.0),
|
temperature: float = Body(TEMPERATURE, description="LLM 采样温度", ge=0.0, le=1.0),
|
||||||
max_tokens: Optional[int] = Body(1024, description="限制LLM生成Token数量,默认None代表模型最大值"),
|
max_tokens: Optional[int] = Body(1024, description="限制LLM生成Token数量,默认None代表模型最大值"),
|
||||||
# top_p: float = Body(TOP_P, description="LLM 核采样。勿与temperature同时设置", gt=0.0, lt=1.0),
|
# top_p: float = Body(TOP_P, description="LLM 核采样。勿与temperature同时设置", gt=0.0, lt=1.0),
|
||||||
@ -23,7 +23,7 @@ async def completion(query: str = Body(..., description="用户输入", examples
|
|||||||
|
|
||||||
#todo 因ApiModelWorker 默认是按chat处理的,会对params["prompt"] 解析为messages,因此ApiModelWorker 使用时需要有相应处理
|
#todo 因ApiModelWorker 默认是按chat处理的,会对params["prompt"] 解析为messages,因此ApiModelWorker 使用时需要有相应处理
|
||||||
async def completion_iterator(query: str,
|
async def completion_iterator(query: str,
|
||||||
model_name: str = LLM_MODEL,
|
model_name: str = LLM_MODELS[0],
|
||||||
prompt_name: str = prompt_name,
|
prompt_name: str = prompt_name,
|
||||||
echo: bool = echo,
|
echo: bool = echo,
|
||||||
) -> AsyncIterable[str]:
|
) -> AsyncIterable[str]:
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
from fastapi import Body, Request
|
from fastapi import Body, Request
|
||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
from configs import (LLM_MODEL, VECTOR_SEARCH_TOP_K, SCORE_THRESHOLD, TEMPERATURE)
|
from configs import (LLM_MODELS, VECTOR_SEARCH_TOP_K, SCORE_THRESHOLD, TEMPERATURE)
|
||||||
from server.utils import wrap_done, get_ChatOpenAI
|
from server.utils import wrap_done, get_ChatOpenAI
|
||||||
from server.utils import BaseResponse, get_prompt_template
|
from server.utils import BaseResponse, get_prompt_template
|
||||||
from langchain.chains import LLMChain
|
from langchain.chains import LLMChain
|
||||||
@ -30,7 +30,7 @@ async def knowledge_base_chat(query: str = Body(..., description="用户输入",
|
|||||||
"content": "虎头虎脑"}]]
|
"content": "虎头虎脑"}]]
|
||||||
),
|
),
|
||||||
stream: bool = Body(False, description="流式输出"),
|
stream: bool = Body(False, description="流式输出"),
|
||||||
model_name: str = Body(LLM_MODEL, description="LLM 模型名称。"),
|
model_name: str = Body(LLM_MODELS[0], description="LLM 模型名称。"),
|
||||||
temperature: float = Body(TEMPERATURE, description="LLM 采样温度", ge=0.0, le=1.0),
|
temperature: float = Body(TEMPERATURE, description="LLM 采样温度", ge=0.0, le=1.0),
|
||||||
max_tokens: Optional[int] = Body(None, description="限制LLM生成Token数量,默认None代表模型最大值"),
|
max_tokens: Optional[int] = Body(None, description="限制LLM生成Token数量,默认None代表模型最大值"),
|
||||||
prompt_name: str = Body("default", description="使用的prompt模板名称(在configs/prompt_config.py中配置)"),
|
prompt_name: str = Body("default", description="使用的prompt模板名称(在configs/prompt_config.py中配置)"),
|
||||||
@ -45,7 +45,7 @@ async def knowledge_base_chat(query: str = Body(..., description="用户输入",
|
|||||||
async def knowledge_base_chat_iterator(query: str,
|
async def knowledge_base_chat_iterator(query: str,
|
||||||
top_k: int,
|
top_k: int,
|
||||||
history: Optional[List[History]],
|
history: Optional[List[History]],
|
||||||
model_name: str = LLM_MODEL,
|
model_name: str = LLM_MODELS[0],
|
||||||
prompt_name: str = prompt_name,
|
prompt_name: str = prompt_name,
|
||||||
) -> AsyncIterable[str]:
|
) -> AsyncIterable[str]:
|
||||||
callback = AsyncIteratorCallbackHandler()
|
callback = AsyncIteratorCallbackHandler()
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
import openai
|
import openai
|
||||||
from configs import LLM_MODEL, logger, log_verbose
|
from configs import LLM_MODELS, logger, log_verbose
|
||||||
from server.utils import get_model_worker_config, fschat_openai_api_address
|
from server.utils import get_model_worker_config, fschat_openai_api_address
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
@ -12,7 +12,7 @@ class OpenAiMessage(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class OpenAiChatMsgIn(BaseModel):
|
class OpenAiChatMsgIn(BaseModel):
|
||||||
model: str = LLM_MODEL
|
model: str = LLM_MODELS[0]
|
||||||
messages: List[OpenAiMessage]
|
messages: List[OpenAiMessage]
|
||||||
temperature: float = 0.7
|
temperature: float = 0.7
|
||||||
n: int = 1
|
n: int = 1
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
from langchain.utilities.bing_search import BingSearchAPIWrapper
|
from langchain.utilities.bing_search import BingSearchAPIWrapper
|
||||||
from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
|
from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
|
||||||
from configs import (BING_SEARCH_URL, BING_SUBSCRIPTION_KEY, METAPHOR_API_KEY,
|
from configs import (BING_SEARCH_URL, BING_SUBSCRIPTION_KEY, METAPHOR_API_KEY,
|
||||||
LLM_MODEL, SEARCH_ENGINE_TOP_K, TEMPERATURE,
|
LLM_MODELS, SEARCH_ENGINE_TOP_K, TEMPERATURE,
|
||||||
TEXT_SPLITTER_NAME, OVERLAP_SIZE)
|
TEXT_SPLITTER_NAME, OVERLAP_SIZE)
|
||||||
from fastapi import Body
|
from fastapi import Body
|
||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
@ -126,7 +126,7 @@ async def search_engine_chat(query: str = Body(..., description="用户输入",
|
|||||||
"content": "虎头虎脑"}]]
|
"content": "虎头虎脑"}]]
|
||||||
),
|
),
|
||||||
stream: bool = Body(False, description="流式输出"),
|
stream: bool = Body(False, description="流式输出"),
|
||||||
model_name: str = Body(LLM_MODEL, description="LLM 模型名称。"),
|
model_name: str = Body(LLM_MODELS[0], description="LLM 模型名称。"),
|
||||||
temperature: float = Body(TEMPERATURE, description="LLM 采样温度", ge=0.0, le=1.0),
|
temperature: float = Body(TEMPERATURE, description="LLM 采样温度", ge=0.0, le=1.0),
|
||||||
max_tokens: Optional[int] = Body(None, description="限制LLM生成Token数量,默认None代表模型最大值"),
|
max_tokens: Optional[int] = Body(None, description="限制LLM生成Token数量,默认None代表模型最大值"),
|
||||||
prompt_name: str = Body("default",description="使用的prompt模板名称(在configs/prompt_config.py中配置)"),
|
prompt_name: str = Body("default",description="使用的prompt模板名称(在configs/prompt_config.py中配置)"),
|
||||||
@ -144,7 +144,7 @@ async def search_engine_chat(query: str = Body(..., description="用户输入",
|
|||||||
search_engine_name: str,
|
search_engine_name: str,
|
||||||
top_k: int,
|
top_k: int,
|
||||||
history: Optional[List[History]],
|
history: Optional[List[History]],
|
||||||
model_name: str = LLM_MODEL,
|
model_name: str = LLM_MODELS[0],
|
||||||
prompt_name: str = prompt_name,
|
prompt_name: str = prompt_name,
|
||||||
) -> AsyncIterable[str]:
|
) -> AsyncIterable[str]:
|
||||||
callback = AsyncIteratorCallbackHandler()
|
callback = AsyncIteratorCallbackHandler()
|
||||||
|
|||||||
@ -48,7 +48,10 @@ class FaissKBService(KBService):
|
|||||||
|
|
||||||
def do_drop_kb(self):
|
def do_drop_kb(self):
|
||||||
self.clear_vs()
|
self.clear_vs()
|
||||||
|
try:
|
||||||
shutil.rmtree(self.kb_path)
|
shutil.rmtree(self.kb_path)
|
||||||
|
except Exception:
|
||||||
|
...
|
||||||
|
|
||||||
def do_search(self,
|
def do_search(self,
|
||||||
query: str,
|
query: str,
|
||||||
@ -90,8 +93,11 @@ class FaissKBService(KBService):
|
|||||||
def do_clear_vs(self):
|
def do_clear_vs(self):
|
||||||
with kb_faiss_pool.atomic:
|
with kb_faiss_pool.atomic:
|
||||||
kb_faiss_pool.pop((self.kb_name, self.vector_name))
|
kb_faiss_pool.pop((self.kb_name, self.vector_name))
|
||||||
|
try:
|
||||||
shutil.rmtree(self.vs_path)
|
shutil.rmtree(self.vs_path)
|
||||||
os.makedirs(self.vs_path)
|
except Exception:
|
||||||
|
...
|
||||||
|
os.makedirs(self.vs_path, exist_ok=True)
|
||||||
|
|
||||||
def exist_doc(self, file_name: str):
|
def exist_doc(self, file_name: str):
|
||||||
if super().exist_doc(file_name):
|
if super().exist_doc(file_name):
|
||||||
|
|||||||
@ -7,7 +7,7 @@ from configs import (
|
|||||||
logger,
|
logger,
|
||||||
log_verbose,
|
log_verbose,
|
||||||
text_splitter_dict,
|
text_splitter_dict,
|
||||||
LLM_MODEL,
|
LLM_MODELS,
|
||||||
TEXT_SPLITTER_NAME,
|
TEXT_SPLITTER_NAME,
|
||||||
)
|
)
|
||||||
import importlib
|
import importlib
|
||||||
@ -57,7 +57,8 @@ def list_files_from_folder(kb_name: str):
|
|||||||
for root, _, files in os.walk(doc_path):
|
for root, _, files in os.walk(doc_path):
|
||||||
tail = os.path.basename(root).lower()
|
tail = os.path.basename(root).lower()
|
||||||
if (tail.startswith("temp")
|
if (tail.startswith("temp")
|
||||||
or tail.startswith("tmp")): # 跳过 temp 或 tmp 开头的文件夹
|
or tail.startswith("tmp")
|
||||||
|
or tail.startswith(".")): # 跳过 [temp, tmp, .] 开头的文件夹
|
||||||
continue
|
continue
|
||||||
for file in files:
|
for file in files:
|
||||||
if file.startswith("~$"): # 跳过 ~$ 开头的文件
|
if file.startswith("~$"): # 跳过 ~$ 开头的文件
|
||||||
@ -192,7 +193,7 @@ def make_text_splitter(
|
|||||||
splitter_name: str = TEXT_SPLITTER_NAME,
|
splitter_name: str = TEXT_SPLITTER_NAME,
|
||||||
chunk_size: int = CHUNK_SIZE,
|
chunk_size: int = CHUNK_SIZE,
|
||||||
chunk_overlap: int = OVERLAP_SIZE,
|
chunk_overlap: int = OVERLAP_SIZE,
|
||||||
llm_model: str = LLM_MODEL,
|
llm_model: str = LLM_MODELS[0],
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
根据参数获取特定的分词器
|
根据参数获取特定的分词器
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
from fastapi import Body
|
from fastapi import Body
|
||||||
from configs import logger, log_verbose, LLM_MODEL, HTTPX_DEFAULT_TIMEOUT
|
from configs import logger, log_verbose, LLM_MODELS, HTTPX_DEFAULT_TIMEOUT
|
||||||
from server.utils import (BaseResponse, fschat_controller_address, list_config_llm_models,
|
from server.utils import (BaseResponse, fschat_controller_address, list_config_llm_models,
|
||||||
get_httpx_client, get_model_worker_config)
|
get_httpx_client, get_model_worker_config)
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
@ -65,7 +65,7 @@ def get_model_config(
|
|||||||
|
|
||||||
|
|
||||||
def stop_llm_model(
|
def stop_llm_model(
|
||||||
model_name: str = Body(..., description="要停止的LLM模型名称", examples=[LLM_MODEL]),
|
model_name: str = Body(..., description="要停止的LLM模型名称", examples=[LLM_MODELS[0]]),
|
||||||
controller_address: str = Body(None, description="Fastchat controller服务器地址", examples=[fschat_controller_address()])
|
controller_address: str = Body(None, description="Fastchat controller服务器地址", examples=[fschat_controller_address()])
|
||||||
) -> BaseResponse:
|
) -> BaseResponse:
|
||||||
'''
|
'''
|
||||||
@ -89,8 +89,8 @@ def stop_llm_model(
|
|||||||
|
|
||||||
|
|
||||||
def change_llm_model(
|
def change_llm_model(
|
||||||
model_name: str = Body(..., description="当前运行模型", examples=[LLM_MODEL]),
|
model_name: str = Body(..., description="当前运行模型", examples=[LLM_MODELS[0]]),
|
||||||
new_model_name: str = Body(..., description="要切换的新模型", examples=[LLM_MODEL]),
|
new_model_name: str = Body(..., description="要切换的新模型", examples=[LLM_MODELS[0]]),
|
||||||
controller_address: str = Body(None, description="Fastchat controller服务器地址", examples=[fschat_controller_address()])
|
controller_address: str = Body(None, description="Fastchat controller服务器地址", examples=[fschat_controller_address()])
|
||||||
):
|
):
|
||||||
'''
|
'''
|
||||||
|
|||||||
@ -59,16 +59,22 @@ class QwenWorker(ApiModelWorker):
|
|||||||
import dashscope
|
import dashscope
|
||||||
params.load_config(self.model_names[0])
|
params.load_config(self.model_names[0])
|
||||||
|
|
||||||
|
result = []
|
||||||
|
i = 0
|
||||||
|
while i < len(params.texts):
|
||||||
|
texts = params.texts[i:i+25]
|
||||||
resp = dashscope.TextEmbedding.call(
|
resp = dashscope.TextEmbedding.call(
|
||||||
model=params.embed_model or self.DEFAULT_EMBED_MODEL,
|
model=params.embed_model or self.DEFAULT_EMBED_MODEL,
|
||||||
input=params.texts, # 最大25行
|
input=texts, # 最大25行
|
||||||
api_key=params.api_key,
|
api_key=params.api_key,
|
||||||
)
|
)
|
||||||
if resp["status_code"] != 200:
|
if resp["status_code"] != 200:
|
||||||
return {"code": resp["status_code"], "msg": resp.message}
|
return {"code": resp["status_code"], "msg": resp.message}
|
||||||
else:
|
else:
|
||||||
embeddings = [x["embedding"] for x in resp["output"]["embeddings"]]
|
embeddings = [x["embedding"] for x in resp["output"]["embeddings"]]
|
||||||
return {"code": 200, "data": embeddings}
|
result += embeddings
|
||||||
|
i += 25
|
||||||
|
return {"code": 200, "data": result}
|
||||||
|
|
||||||
def get_embeddings(self, params):
|
def get_embeddings(self, params):
|
||||||
# TODO: 支持embeddings
|
# TODO: 支持embeddings
|
||||||
|
|||||||
@ -4,7 +4,7 @@ from typing import List
|
|||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import asyncio
|
import asyncio
|
||||||
from configs import (LLM_MODEL, LLM_DEVICE, EMBEDDING_DEVICE,
|
from configs import (LLM_MODELS, LLM_DEVICE, EMBEDDING_DEVICE,
|
||||||
MODEL_PATH, MODEL_ROOT_PATH, ONLINE_LLM_MODEL, logger, log_verbose,
|
MODEL_PATH, MODEL_ROOT_PATH, ONLINE_LLM_MODEL, logger, log_verbose,
|
||||||
FSCHAT_MODEL_WORKERS, HTTPX_DEFAULT_TIMEOUT)
|
FSCHAT_MODEL_WORKERS, HTTPX_DEFAULT_TIMEOUT)
|
||||||
import os
|
import os
|
||||||
@ -345,8 +345,7 @@ def list_config_llm_models() -> Dict[str, Dict]:
|
|||||||
return [(model_name, config_type), ...]
|
return [(model_name, config_type), ...]
|
||||||
'''
|
'''
|
||||||
workers = list(FSCHAT_MODEL_WORKERS)
|
workers = list(FSCHAT_MODEL_WORKERS)
|
||||||
if LLM_MODEL not in workers:
|
|
||||||
workers.insert(0, LLM_MODEL)
|
|
||||||
return {
|
return {
|
||||||
"local": MODEL_PATH["llm_model"],
|
"local": MODEL_PATH["llm_model"],
|
||||||
"online": ONLINE_LLM_MODEL,
|
"online": ONLINE_LLM_MODEL,
|
||||||
@ -431,7 +430,7 @@ def fschat_controller_address() -> str:
|
|||||||
return f"http://{host}:{port}"
|
return f"http://{host}:{port}"
|
||||||
|
|
||||||
|
|
||||||
def fschat_model_worker_address(model_name: str = LLM_MODEL) -> str:
|
def fschat_model_worker_address(model_name: str = LLM_MODELS[0]) -> str:
|
||||||
if model := get_model_worker_config(model_name): # TODO: depends fastchat
|
if model := get_model_worker_config(model_name): # TODO: depends fastchat
|
||||||
host = model["host"]
|
host = model["host"]
|
||||||
if host == "0.0.0.0":
|
if host == "0.0.0.0":
|
||||||
@ -660,7 +659,7 @@ def get_server_configs() -> Dict:
|
|||||||
TEXT_SPLITTER_NAME,
|
TEXT_SPLITTER_NAME,
|
||||||
)
|
)
|
||||||
from configs.model_config import (
|
from configs.model_config import (
|
||||||
LLM_MODEL,
|
LLM_MODELS,
|
||||||
HISTORY_LEN,
|
HISTORY_LEN,
|
||||||
TEMPERATURE,
|
TEMPERATURE,
|
||||||
)
|
)
|
||||||
|
|||||||
14
startup.py
14
startup.py
@ -22,7 +22,7 @@ from configs import (
|
|||||||
LOG_PATH,
|
LOG_PATH,
|
||||||
log_verbose,
|
log_verbose,
|
||||||
logger,
|
logger,
|
||||||
LLM_MODEL,
|
LLM_MODELS,
|
||||||
EMBEDDING_MODEL,
|
EMBEDDING_MODEL,
|
||||||
TEXT_SPLITTER_NAME,
|
TEXT_SPLITTER_NAME,
|
||||||
FSCHAT_CONTROLLER,
|
FSCHAT_CONTROLLER,
|
||||||
@ -359,7 +359,7 @@ def run_controller(log_level: str = "INFO", started_event: mp.Event = None):
|
|||||||
|
|
||||||
|
|
||||||
def run_model_worker(
|
def run_model_worker(
|
||||||
model_name: str = LLM_MODEL,
|
model_name: str = LLM_MODELS[0],
|
||||||
controller_address: str = "",
|
controller_address: str = "",
|
||||||
log_level: str = "INFO",
|
log_level: str = "INFO",
|
||||||
q: mp.Queue = None,
|
q: mp.Queue = None,
|
||||||
@ -496,7 +496,7 @@ def parse_args() -> argparse.ArgumentParser:
|
|||||||
"--model-worker",
|
"--model-worker",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="run fastchat's model_worker server with specified model name. "
|
help="run fastchat's model_worker server with specified model name. "
|
||||||
"specify --model-name if not using default LLM_MODEL",
|
"specify --model-name if not using default LLM_MODELS",
|
||||||
dest="model_worker",
|
dest="model_worker",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@ -504,7 +504,7 @@ def parse_args() -> argparse.ArgumentParser:
|
|||||||
"--model-name",
|
"--model-name",
|
||||||
type=str,
|
type=str,
|
||||||
nargs="+",
|
nargs="+",
|
||||||
default=[LLM_MODEL],
|
default=LLM_MODELS,
|
||||||
help="specify model name for model worker. "
|
help="specify model name for model worker. "
|
||||||
"add addition names with space seperated to start multiple model workers.",
|
"add addition names with space seperated to start multiple model workers.",
|
||||||
dest="model_name",
|
dest="model_name",
|
||||||
@ -568,7 +568,7 @@ def dump_server_info(after_start=False, args=None):
|
|||||||
print(f"langchain版本:{langchain.__version__}. fastchat版本:{fastchat.__version__}")
|
print(f"langchain版本:{langchain.__version__}. fastchat版本:{fastchat.__version__}")
|
||||||
print("\n")
|
print("\n")
|
||||||
|
|
||||||
models = [LLM_MODEL]
|
models = LLM_MODELS
|
||||||
if args and args.model_name:
|
if args and args.model_name:
|
||||||
models = args.model_name
|
models = args.model_name
|
||||||
|
|
||||||
@ -694,8 +694,8 @@ async def start_main_server():
|
|||||||
processes["model_worker"][model_name] = process
|
processes["model_worker"][model_name] = process
|
||||||
|
|
||||||
if args.api_worker:
|
if args.api_worker:
|
||||||
configs = get_all_model_worker_configs()
|
for model_name in args.model_name:
|
||||||
for model_name, config in configs.items():
|
config = get_model_worker_config(model_name)
|
||||||
if (config.get("online_api")
|
if (config.get("online_api")
|
||||||
and config.get("worker_class")
|
and config.get("worker_class")
|
||||||
and model_name in FSCHAT_MODEL_WORKERS):
|
and model_name in FSCHAT_MODEL_WORKERS):
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||||
from configs import LLM_MODEL, TEMPERATURE
|
from configs import LLM_MODELS, TEMPERATURE
|
||||||
from server.utils import get_ChatOpenAI
|
from server.utils import get_ChatOpenAI
|
||||||
from langchain.chains import LLMChain
|
from langchain.chains import LLMChain
|
||||||
from langchain.agents import LLMSingleActionAgent, AgentExecutor
|
from langchain.agents import LLMSingleActionAgent, AgentExecutor
|
||||||
@ -10,7 +10,7 @@ from langchain.memory import ConversationBufferWindowMemory
|
|||||||
|
|
||||||
memory = ConversationBufferWindowMemory(k=5)
|
memory = ConversationBufferWindowMemory(k=5)
|
||||||
model = get_ChatOpenAI(
|
model = get_ChatOpenAI(
|
||||||
model_name=LLM_MODEL,
|
model_name=LLM_MODELS[0],
|
||||||
temperature=TEMPERATURE,
|
temperature=TEMPERATURE,
|
||||||
)
|
)
|
||||||
from server.agent.custom_template import CustomOutputParser, prompt
|
from server.agent.custom_template import CustomOutputParser, prompt
|
||||||
|
|||||||
@ -6,7 +6,6 @@ from pathlib import Path
|
|||||||
root_path = Path(__file__).parent.parent.parent
|
root_path = Path(__file__).parent.parent.parent
|
||||||
sys.path.append(str(root_path))
|
sys.path.append(str(root_path))
|
||||||
from configs.server_config import FSCHAT_MODEL_WORKERS
|
from configs.server_config import FSCHAT_MODEL_WORKERS
|
||||||
from configs.model_config import LLM_MODEL
|
|
||||||
from server.utils import api_address, get_model_worker_config
|
from server.utils import api_address, get_model_worker_config
|
||||||
|
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
|
|||||||
@ -8,7 +8,7 @@ from pathlib import Path
|
|||||||
from configs import (
|
from configs import (
|
||||||
EMBEDDING_MODEL,
|
EMBEDDING_MODEL,
|
||||||
DEFAULT_VS_TYPE,
|
DEFAULT_VS_TYPE,
|
||||||
LLM_MODEL,
|
LLM_MODELS,
|
||||||
TEMPERATURE,
|
TEMPERATURE,
|
||||||
SCORE_THRESHOLD,
|
SCORE_THRESHOLD,
|
||||||
CHUNK_SIZE,
|
CHUNK_SIZE,
|
||||||
@ -259,7 +259,7 @@ class ApiRequest:
|
|||||||
self,
|
self,
|
||||||
messages: List[Dict],
|
messages: List[Dict],
|
||||||
stream: bool = True,
|
stream: bool = True,
|
||||||
model: str = LLM_MODEL,
|
model: str = LLM_MODELS[0],
|
||||||
temperature: float = TEMPERATURE,
|
temperature: float = TEMPERATURE,
|
||||||
max_tokens: int = None,
|
max_tokens: int = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
@ -291,7 +291,7 @@ class ApiRequest:
|
|||||||
query: str,
|
query: str,
|
||||||
history: List[Dict] = [],
|
history: List[Dict] = [],
|
||||||
stream: bool = True,
|
stream: bool = True,
|
||||||
model: str = LLM_MODEL,
|
model: str = LLM_MODELS[0],
|
||||||
temperature: float = TEMPERATURE,
|
temperature: float = TEMPERATURE,
|
||||||
max_tokens: int = None,
|
max_tokens: int = None,
|
||||||
prompt_name: str = "default",
|
prompt_name: str = "default",
|
||||||
@ -321,7 +321,7 @@ class ApiRequest:
|
|||||||
query: str,
|
query: str,
|
||||||
history: List[Dict] = [],
|
history: List[Dict] = [],
|
||||||
stream: bool = True,
|
stream: bool = True,
|
||||||
model: str = LLM_MODEL,
|
model: str = LLM_MODELS[0],
|
||||||
temperature: float = TEMPERATURE,
|
temperature: float = TEMPERATURE,
|
||||||
max_tokens: int = None,
|
max_tokens: int = None,
|
||||||
prompt_name: str = "default",
|
prompt_name: str = "default",
|
||||||
@ -353,7 +353,7 @@ class ApiRequest:
|
|||||||
score_threshold: float = SCORE_THRESHOLD,
|
score_threshold: float = SCORE_THRESHOLD,
|
||||||
history: List[Dict] = [],
|
history: List[Dict] = [],
|
||||||
stream: bool = True,
|
stream: bool = True,
|
||||||
model: str = LLM_MODEL,
|
model: str = LLM_MODELS[0],
|
||||||
temperature: float = TEMPERATURE,
|
temperature: float = TEMPERATURE,
|
||||||
max_tokens: int = None,
|
max_tokens: int = None,
|
||||||
prompt_name: str = "default",
|
prompt_name: str = "default",
|
||||||
@ -391,7 +391,7 @@ class ApiRequest:
|
|||||||
top_k: int = SEARCH_ENGINE_TOP_K,
|
top_k: int = SEARCH_ENGINE_TOP_K,
|
||||||
history: List[Dict] = [],
|
history: List[Dict] = [],
|
||||||
stream: bool = True,
|
stream: bool = True,
|
||||||
model: str = LLM_MODEL,
|
model: str = LLM_MODELS[0],
|
||||||
temperature: float = TEMPERATURE,
|
temperature: float = TEMPERATURE,
|
||||||
max_tokens: int = None,
|
max_tokens: int = None,
|
||||||
prompt_name: str = "default",
|
prompt_name: str = "default",
|
||||||
@ -677,9 +677,10 @@ class ApiRequest:
|
|||||||
return self._get_response_value(response, as_json=True, value_func=lambda r:r.get("data", []))
|
return self._get_response_value(response, as_json=True, value_func=lambda r:r.get("data", []))
|
||||||
|
|
||||||
|
|
||||||
def get_default_llm_model(self) -> Tuple[str, bool]:
|
def get_default_llm_model(self, local_first: bool = True) -> Tuple[str, bool]:
|
||||||
'''
|
'''
|
||||||
从服务器上获取当前运行的LLM模型,如果本机配置的LLM_MODEL属于本地模型且在其中,则优先返回
|
从服务器上获取当前运行的LLM模型。
|
||||||
|
当 local_first=True 时,优先返回运行中的本地模型,否则优先按LLM_MODELS配置顺序返回。
|
||||||
返回类型为(model_name, is_local_model)
|
返回类型为(model_name, is_local_model)
|
||||||
'''
|
'''
|
||||||
def ret_sync():
|
def ret_sync():
|
||||||
@ -687,26 +688,42 @@ class ApiRequest:
|
|||||||
if not running_models:
|
if not running_models:
|
||||||
return "", False
|
return "", False
|
||||||
|
|
||||||
if LLM_MODEL in running_models:
|
model = ""
|
||||||
return LLM_MODEL, True
|
for m in LLM_MODELS:
|
||||||
|
if m not in running_models:
|
||||||
|
continue
|
||||||
|
is_local = not running_models[m].get("online_api")
|
||||||
|
if local_first and not is_local:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
model = m
|
||||||
|
break
|
||||||
|
|
||||||
local_models = [k for k, v in running_models.items() if not v.get("online_api")]
|
if not model: # LLM_MODELS中配置的模型都不在running_models里
|
||||||
if local_models:
|
model = list(running_models)[0]
|
||||||
return local_models[0], True
|
is_local = not running_models[model].get("online_api")
|
||||||
return list(running_models)[0], False
|
return model, is_local
|
||||||
|
|
||||||
async def ret_async():
|
async def ret_async():
|
||||||
running_models = await self.list_running_models()
|
running_models = await self.list_running_models()
|
||||||
if not running_models:
|
if not running_models:
|
||||||
return "", False
|
return "", False
|
||||||
|
|
||||||
if LLM_MODEL in running_models:
|
model = ""
|
||||||
return LLM_MODEL, True
|
for m in LLM_MODELS:
|
||||||
|
if m not in running_models:
|
||||||
|
continue
|
||||||
|
is_local = not running_models[m].get("online_api")
|
||||||
|
if local_first and not is_local:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
model = m
|
||||||
|
break
|
||||||
|
|
||||||
local_models = [k for k, v in running_models.items() if not v.get("online_api")]
|
if not model: # LLM_MODELS中配置的模型都不在running_models里
|
||||||
if local_models:
|
model = list(running_models)[0]
|
||||||
return local_models[0], True
|
is_local = not running_models[model].get("online_api")
|
||||||
return list(running_models)[0], False
|
return model, is_local
|
||||||
|
|
||||||
if self._use_async:
|
if self._use_async:
|
||||||
return ret_async()
|
return ret_async()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user