fix:1、index_name获取兼容windows和linux系统 2、补充mappings,创建向量索引结构保持一致 3、添加ES向量库的测试方法

This commit is contained in:
zhengmingsheng 2024-01-18 11:25:27 +08:00
parent b2ea386f60
commit 2ab944996e

View File

@ -6,6 +6,7 @@ from langchain.schema import Document
from langchain.vectorstores.elasticsearch import ElasticsearchStore from langchain.vectorstores.elasticsearch import ElasticsearchStore
from configs import KB_ROOT_PATH, EMBEDDING_MODEL, EMBEDDING_DEVICE, CACHED_VS_NUM from configs import KB_ROOT_PATH, EMBEDDING_MODEL, EMBEDDING_DEVICE, CACHED_VS_NUM
from server.knowledge_base.kb_service.base import KBService, SupportedVSType from server.knowledge_base.kb_service.base import KBService, SupportedVSType
from server.knowledge_base.utils import KnowledgeFile
from server.utils import load_local_embeddings from server.utils import load_local_embeddings
from elasticsearch import Elasticsearch,BadRequestError from elasticsearch import Elasticsearch,BadRequestError
from configs import logger from configs import logger
@ -15,7 +16,7 @@ class ESKBService(KBService):
def do_init(self): def do_init(self):
self.kb_path = self.get_kb_path(self.kb_name) self.kb_path = self.get_kb_path(self.kb_name)
self.index_name = kbs_config[self.vs_type()]['index_name'] self.index_name = os.path.split(self.kb_path)[-1]
self.IP = kbs_config[self.vs_type()]['host'] self.IP = kbs_config[self.vs_type()]['host']
self.PORT = kbs_config[self.vs_type()]['port'] self.PORT = kbs_config[self.vs_type()]['port']
self.user = kbs_config[self.vs_type()].get("user",'') self.user = kbs_config[self.vs_type()].get("user",'')
@ -25,11 +26,11 @@ class ESKBService(KBService):
try: try:
# ES python客户端连接仅连接 # ES python客户端连接仅连接
if self.user != "" and self.password != "": if self.user != "" and self.password != "":
self.es_client_python = Elasticsearch(f"https://{self.IP}:{self.PORT}", self.es_client_python = Elasticsearch(f"http://{self.IP}:{self.PORT}",
basic_auth=(self.user,self.password)) basic_auth=(self.user,self.password))
else: else:
logger.warning("ES未配置用户名和密码") logger.warning("ES未配置用户名和密码")
self.es_client_python = Elasticsearch(f"https://{self.IP}:{self.PORT}") self.es_client_python = Elasticsearch(f"http://{self.IP}:{self.PORT}")
except ConnectionError: except ConnectionError:
logger.error("连接到 Elasticsearch 失败!") logger.error("连接到 Elasticsearch 失败!")
raise ConnectionError raise ConnectionError
@ -39,8 +40,12 @@ class ESKBService(KBService):
try: try:
# 首先尝试通过es_client_python创建 # 首先尝试通过es_client_python创建
mappings = { mappings = {
"dense_vector": { "properties": {
"type": "dense_vector" "dense_vector": {
"type": "dense_vector",
"dims": self.dims_length,
"index": True
}
} }
} }
self.es_client_python.indices.create(index=self.index_name, mappings=mappings) self.es_client_python.indices.create(index=self.index_name, mappings=mappings)
@ -52,7 +57,7 @@ class ESKBService(KBService):
# langchain ES 连接、创建索引 # langchain ES 连接、创建索引
if self.user != "" and self.password != "": if self.user != "" and self.password != "":
self.db_init = ElasticsearchStore( self.db_init = ElasticsearchStore(
es_url=f"https://{self.IP}:{self.PORT}", es_url=f"http://{self.IP}:{self.PORT}",
index_name=self.index_name, index_name=self.index_name,
query_field="context", query_field="context",
vector_query_field="dense_vector", vector_query_field="dense_vector",
@ -63,7 +68,7 @@ class ESKBService(KBService):
else: else:
logger.warning("ES未配置用户名和密码") logger.warning("ES未配置用户名和密码")
self.db_init = ElasticsearchStore( self.db_init = ElasticsearchStore(
es_url=f"https://{self.IP}:{self.PORT}", es_url=f"http://{self.IP}:{self.PORT}",
index_name=self.index_name, index_name=self.index_name,
query_field="context", query_field="context",
vector_query_field="dense_vector", vector_query_field="dense_vector",
@ -115,7 +120,7 @@ class ESKBService(KBService):
self.db = ElasticsearchStore.from_documents( self.db = ElasticsearchStore.from_documents(
documents=docs, documents=docs,
embedding=embed_model, embedding=embed_model,
es_url= f"https://{self.IP}:{self.PORT}", es_url= f"http://{self.IP}:{self.PORT}",
index_name=self.index_name, index_name=self.index_name,
distance_strategy="COSINE", distance_strategy="COSINE",
query_field="context", query_field="context",
@ -128,7 +133,7 @@ class ESKBService(KBService):
self.db = ElasticsearchStore.from_documents( self.db = ElasticsearchStore.from_documents(
documents=docs, documents=docs,
embedding=embed_model, embedding=embed_model,
es_url= f"https://{self.IP}:{self.PORT}", es_url= f"http://{self.IP}:{self.PORT}",
index_name=self.index_name, index_name=self.index_name,
distance_strategy="COSINE", distance_strategy="COSINE",
query_field="context", query_field="context",
@ -225,7 +230,12 @@ class ESKBService(KBService):
shutil.rmtree(self.kb_path) shutil.rmtree(self.kb_path)
if __name__ == '__main__':
esKBService = ESKBService("test")
#esKBService.clear_vs()
#esKBService.create_kb()
esKBService.add_doc(KnowledgeFile(filename="README.md", knowledge_base_name="test"))
print(esKBService.search_docs("如何启动api服务"))