mirror of
https://github.com/RYDE-WORK/Langchain-Chatchat.git
synced 2026-01-19 21:37:20 +08:00
改变 Embeddings 模型改为使用框架 API,不再手动加载,删除自定义 Embeddings Keyword 代码 修改依赖文件,移除 torch transformers 等重依赖 暂时移出对 loom 的集成 后续: 1、优化目录结构 2、检查合并中有无被覆盖的 0.2.10 内容
114 lines
3.2 KiB
Plaintext
114 lines
3.2 KiB
Plaintext
import os
|
||
|
||
# 默认使用的知识库
|
||
DEFAULT_KNOWLEDGE_BASE = "samples"
|
||
|
||
# 默认向量库/全文检索引擎类型。可选:faiss, milvus(离线) & zilliz(在线), pgvector,全文检索引擎es
|
||
DEFAULT_VS_TYPE = "faiss"
|
||
|
||
# 缓存向量库数量(针对FAISS)
|
||
CACHED_VS_NUM = 1
|
||
|
||
# 缓存临时向量库数量(针对FAISS),用于文件对话
|
||
CACHED_MEMO_VS_NUM = 10
|
||
|
||
# 知识库中单段文本长度(不适用MarkdownHeaderTextSplitter)
|
||
CHUNK_SIZE = 250
|
||
|
||
# 知识库中相邻文本重合长度(不适用MarkdownHeaderTextSplitter)
|
||
OVERLAP_SIZE = 50
|
||
|
||
# 知识库匹配向量数量
|
||
VECTOR_SEARCH_TOP_K = 3
|
||
|
||
# 知识库匹配相关度阈值,取值范围在0-1之间,SCORE越小,相关度越高,取到1相当于不筛选,建议设置在0.5左右
|
||
SCORE_THRESHOLD = 1
|
||
|
||
# 默认搜索引擎。可选:bing, duckduckgo, metaphor
|
||
DEFAULT_SEARCH_ENGINE = "duckduckgo"
|
||
|
||
# 搜索引擎匹配结题数量
|
||
SEARCH_ENGINE_TOP_K = 3
|
||
|
||
ZH_TITLE_ENHANCE = False
|
||
|
||
# 每个知识库的初始化介绍,用于在初始化知识库时显示和Agent调用,没写则没有介绍,不会被Agent调用。
|
||
KB_INFO = {
|
||
"samples": "关于本项目issue的解答",
|
||
}
|
||
|
||
|
||
# 通常情况下不需要更改以下内容
|
||
|
||
# 知识库默认存储路径
|
||
KB_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base")
|
||
if not os.path.exists(KB_ROOT_PATH):
|
||
os.mkdir(KB_ROOT_PATH)
|
||
|
||
# 数据库默认存储路径。
|
||
# 如果使用sqlite,可以直接修改DB_ROOT_PATH;如果使用其它数据库,请直接修改SQLALCHEMY_DATABASE_URI。
|
||
DB_ROOT_PATH = os.path.join(KB_ROOT_PATH, "info.db")
|
||
SQLALCHEMY_DATABASE_URI = f"sqlite:///{DB_ROOT_PATH}"
|
||
|
||
# 可选向量库类型及对应配置
|
||
kbs_config = {
|
||
"faiss": {
|
||
},
|
||
"milvus": {
|
||
"host": "127.0.0.1",
|
||
"port": "19530",
|
||
"user": "",
|
||
"password": "",
|
||
"secure": False,
|
||
},
|
||
"zilliz": {
|
||
"host": "in01-a7ce524e41e3935.ali-cn-hangzhou.vectordb.zilliz.com.cn",
|
||
"port": "19530",
|
||
"user": "",
|
||
"password": "",
|
||
"secure": True,
|
||
},
|
||
"pg": {
|
||
"connection_uri": "postgresql://postgres:postgres@127.0.0.1:5432/langchain_chatchat",
|
||
},
|
||
|
||
"es": {
|
||
"host": "127.0.0.1",
|
||
"port": "9200",
|
||
"index_name": "test_index",
|
||
"user": "",
|
||
"password": ""
|
||
}
|
||
}
|
||
|
||
# TextSplitter配置项,如果你不明白其中的含义,就不要修改。
|
||
text_splitter_dict = {
|
||
"ChineseRecursiveTextSplitter": {
|
||
"source": "huggingface", # 选择tiktoken则使用openai的方法
|
||
"tokenizer_name_or_path": "",
|
||
},
|
||
"SpacyTextSplitter": {
|
||
"source": "huggingface",
|
||
"tokenizer_name_or_path": "gpt2",
|
||
},
|
||
"RecursiveCharacterTextSplitter": {
|
||
"source": "tiktoken",
|
||
"tokenizer_name_or_path": "cl100k_base",
|
||
},
|
||
"MarkdownHeaderTextSplitter": {
|
||
"headers_to_split_on":
|
||
[
|
||
("#", "head1"),
|
||
("##", "head2"),
|
||
("###", "head3"),
|
||
("####", "head4"),
|
||
]
|
||
},
|
||
}
|
||
|
||
# TEXT_SPLITTER 名称
|
||
TEXT_SPLITTER_NAME = "ChineseRecursiveTextSplitter"
|
||
|
||
# Embedding模型定制词语的词表文件
|
||
EMBEDDING_KEYWORD_FILE = "embedding_keywords.txt"
|