import os DEFAULT_KNOWLEDGE_BASE = "samples" # 默认向量库/全文检索引擎类型。可选:faiss, milvus(离线) & zilliz(在线), pgvector,全文检索引擎es DEFAULT_VS_TYPE = "faiss" # 缓存向量库数量(针对FAISS) CACHED_VS_NUM = 1 # 缓存临时向量库数量(针对FAISS),用于文件对话 CACHED_MEMO_VS_NUM = 10 # 知识库中单段文本长度(不适用MarkdownHeaderTextSplitter) CHUNK_SIZE = 250 # 知识库中相邻文本重合长度(不适用MarkdownHeaderTextSplitter) OVERLAP_SIZE = 50 # 知识库匹配向量数量 VECTOR_SEARCH_TOP_K = 3 # 知识库匹配相关度阈值,取值范围在0-1之间,SCORE越小,相关度越高,取到1相当于不筛选,建议设置在0.5左右 SCORE_THRESHOLD = 1 # 默认搜索引擎。可选:bing, duckduckgo, metaphor DEFAULT_SEARCH_ENGINE = "duckduckgo" # 搜索引擎匹配结题数量 SEARCH_ENGINE_TOP_K = 3 ZH_TITLE_ENHANCE = False # 每个知识库的初始化介绍,用于在初始化知识库时显示和Agent调用,没写则没有介绍,不会被Agent调用。 KB_INFO = { "samples": "关于本项目issue的解答", } # 通常情况下不需要更改以下内容 # 知识库默认存储路径 KB_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base") if not os.path.exists(KB_ROOT_PATH): os.mkdir(KB_ROOT_PATH) # 数据库默认存储路径。 # 如果使用sqlite,可以直接修改DB_ROOT_PATH;如果使用其它数据库,请直接修改SQLALCHEMY_DATABASE_URI。 DB_ROOT_PATH = os.path.join(KB_ROOT_PATH, "info.db") SQLALCHEMY_DATABASE_URI = f"sqlite:///{DB_ROOT_PATH}" # 可选向量库类型及对应配置 kbs_config = { "faiss": { }, "milvus": { "host": "127.0.0.1", "port": "19530", "user": "", "password": "", "secure": False, }, "zilliz": { "host": "in01-a7ce524e41e3935.ali-cn-hangzhou.vectordb.zilliz.com.cn", "port": "19530", "user": "", "password": "", "secure": True, }, "pg": { "connection_uri": "postgresql://postgres:postgres@127.0.0.1:5432/langchain_chatchat", }, "es": { "host": "127.0.0.1", "port": "9200", "index_name": "test_index", "user": "", "password": "" } } # TextSplitter配置项,如果你不明白其中的含义,就不要修改。 text_splitter_dict = { "ChineseRecursiveTextSplitter": { "source": "huggingface", # 选择tiktoken则使用openai的方法 "tokenizer_name_or_path": "", }, "SpacyTextSplitter": { "source": "huggingface", "tokenizer_name_or_path": "gpt2", }, "RecursiveCharacterTextSplitter": { "source": "tiktoken", "tokenizer_name_or_path": "cl100k_base", }, "MarkdownHeaderTextSplitter": { "headers_to_split_on": [ ("#", "head1"), ("##", "head2"), ("###", "head3"), ("####", "head4"), ] }, } # TEXT_SPLITTER 名称 TEXT_SPLITTER_NAME = "ChineseRecursiveTextSplitter" # Embedding模型定制词语的词表文件 EMBEDDING_KEYWORD_FILE = "embedding_keywords.txt"