更新了chromadb的打印的符号

This commit is contained in:
zR 2024-01-30 14:18:14 +08:00
parent ffbfcd41f2
commit 042a70c09a
6 changed files with 15 additions and 11 deletions

View File

@ -3,7 +3,7 @@ import os
# 默认使用的知识库
DEFAULT_KNOWLEDGE_BASE = "samples"
# 默认向量库/全文检索引擎类型。可选faiss, milvus(离线) & zilliz(在线), pgvector, 全文检索引擎es, chromadb
# 默认向量库/全文检索引擎类型。可选faiss, milvus(离线) & zilliz(在线), pgvector, chromadb 全文检索引擎es
DEFAULT_VS_TYPE = "faiss"
# 缓存向量库数量针对FAISS

View File

@ -44,10 +44,11 @@ llama-index==0.9.35
# dashscope==1.13.6 # qwen
# volcengine==1.0.119 # fangzhou
# uncomment libs if you want to use corresponding vector store
# pymilvus==2.3.4
# pymilvus==2.3.6
# psycopg2==2.9.9
# pgvector==0.2.4
# pgvector>=0.2.4
# chromadb==0.4.13
#flash-attn==2.4.2 # For Orion-14B-Chat and Qwen-14B-Chat
#autoawq==0.1.8 # For Int4
#rapidocr_paddle[gpu]==1.3.11 # gpu accelleration for ocr of pdf and image files

View File

@ -50,9 +50,11 @@ pyjwt==2.8.0
# metaphor-python~=0.1.23
# volcengine>=1.0.119
# pymilvus>=2.3.4
# pymilvus==2.3.6
# psycopg2==2.9.9
# pgvector>=0.2.4
# chromadb==0.4.13
#flash-attn==2.4.2 # For Orion-14B-Chat and Qwen-14B-Chat
#autoawq==0.1.8 # For Int4
#rapidocr_paddle[gpu]==1.3.11 # gpu accelleration for ocr of pdf and image files

View File

@ -33,6 +33,7 @@ watchdog~=3.0.0
# psycopg2==2.9.9
# pgvector>=0.2.4
# chromadb==0.4.13
# jq==1.6.0
# beautifulsoup4~=4.12.2
# pysrt~=1.1.2

View File

@ -23,13 +23,13 @@ def _get_result_to_documents(get_result: GetResult) -> List[Document]:
return document_list
def _results_to_docs_and_scores(results: Any) -> List[Tuple[Document, float]]:
"""
from langchain_community.vectorstores.chroma import Chroma
"""
return [
# TODO: Chroma can do batch querying,
# we shouldn't hard code to the 1st result
(Document(page_content=result[0], metadata=result[1] or {}), result[2])
for result in zip(
results["documents"][0],
@ -40,14 +40,12 @@ def _results_to_docs_and_scores(results: Any) -> List[Tuple[Document, float]]:
class ChromaKBService(KBService):
vs_path: str
kb_path: str
client = None
collection = None
def vs_type(self) -> str:
return SupportedVSType.CHROMADB
@ -75,7 +73,8 @@ class ChromaKBService(KBService):
if not str(e) == f"Collection {self.kb_name} does not exist.":
raise e
def do_search(self, query: str, top_k: int, score_threshold: float = SCORE_THRESHOLD) -> List[Tuple[Document, float]]:
def do_search(self, query: str, top_k: int, score_threshold: float = SCORE_THRESHOLD) -> List[
Tuple[Document, float]]:
embed_func = EmbeddingsFunAdapter(self.embed_model)
embeddings = embed_func.embed_query(query)
query_result: QueryResult = self.collection.query(query_embeddings=embeddings, n_results=top_k)
@ -84,7 +83,6 @@ class ChromaKBService(KBService):
def do_add_doc(self, docs: List[Document], **kwargs) -> List[Dict]:
doc_infos = []
data = self._docs_to_embeddings(docs)
print(data)
ids = [str(uuid.uuid1()) for _ in range(len(data["texts"]))]
for _id, text, embedding, metadata in zip(ids, data["texts"], data["embeddings"], data["metadatas"]):
self.collection.add(ids=_id, embeddings=embedding, metadatas=metadata, documents=text)

View File

@ -2,6 +2,7 @@ from typing import List, Dict, Optional
from langchain.schema import Document
from langchain.vectorstores.milvus import Milvus
import os
from configs import kbs_config
@ -85,6 +86,7 @@ class MilvusKBService(KBService):
def do_delete_doc(self, kb_file: KnowledgeFile, **kwargs):
if self.milvus.col:
filepath = kb_file.filepath.replace('\\', '\\\\')
filename = os.path.basename(filepath)
delete_list = [item.get("pk") for item in
self.milvus.col.query(expr=f'source == "{filepath}"', output_fields=["pk"])]
self.milvus.col.delete(expr=f'pk in {delete_list}')