mirror of
https://github.com/RYDE-WORK/Langchain-Chatchat.git
synced 2026-01-19 13:23:16 +08:00
更新了chromadb的打印的符号
This commit is contained in:
parent
ffbfcd41f2
commit
042a70c09a
@ -3,7 +3,7 @@ import os
|
||||
# 默认使用的知识库
|
||||
DEFAULT_KNOWLEDGE_BASE = "samples"
|
||||
|
||||
# 默认向量库/全文检索引擎类型。可选:faiss, milvus(离线) & zilliz(在线), pgvector, 全文检索引擎es, chromadb
|
||||
# 默认向量库/全文检索引擎类型。可选:faiss, milvus(离线) & zilliz(在线), pgvector, chromadb 全文检索引擎es
|
||||
DEFAULT_VS_TYPE = "faiss"
|
||||
|
||||
# 缓存向量库数量(针对FAISS)
|
||||
|
||||
@ -44,10 +44,11 @@ llama-index==0.9.35
|
||||
# dashscope==1.13.6 # qwen
|
||||
# volcengine==1.0.119 # fangzhou
|
||||
# uncomment libs if you want to use corresponding vector store
|
||||
# pymilvus==2.3.4
|
||||
# pymilvus==2.3.6
|
||||
# psycopg2==2.9.9
|
||||
# pgvector==0.2.4
|
||||
# pgvector>=0.2.4
|
||||
# chromadb==0.4.13
|
||||
|
||||
#flash-attn==2.4.2 # For Orion-14B-Chat and Qwen-14B-Chat
|
||||
#autoawq==0.1.8 # For Int4
|
||||
#rapidocr_paddle[gpu]==1.3.11 # gpu accelleration for ocr of pdf and image files
|
||||
|
||||
@ -50,9 +50,11 @@ pyjwt==2.8.0
|
||||
# metaphor-python~=0.1.23
|
||||
|
||||
# volcengine>=1.0.119
|
||||
# pymilvus>=2.3.4
|
||||
# pymilvus==2.3.6
|
||||
# psycopg2==2.9.9
|
||||
# pgvector>=0.2.4
|
||||
# chromadb==0.4.13
|
||||
|
||||
#flash-attn==2.4.2 # For Orion-14B-Chat and Qwen-14B-Chat
|
||||
#autoawq==0.1.8 # For Int4
|
||||
#rapidocr_paddle[gpu]==1.3.11 # gpu accelleration for ocr of pdf and image files
|
||||
@ -33,6 +33,7 @@ watchdog~=3.0.0
|
||||
# psycopg2==2.9.9
|
||||
# pgvector>=0.2.4
|
||||
# chromadb==0.4.13
|
||||
|
||||
# jq==1.6.0
|
||||
# beautifulsoup4~=4.12.2
|
||||
# pysrt~=1.1.2
|
||||
|
||||
@ -15,7 +15,7 @@ def _get_result_to_documents(get_result: GetResult) -> List[Document]:
|
||||
if not get_result['documents']:
|
||||
return []
|
||||
|
||||
_metadatas = get_result['metadatas'] if get_result['metadatas'] else [{}] * len(get_result['documents'])
|
||||
_metadatas = get_result['metadatas'] if get_result['metadatas'] else [{}] * len(get_result['documents'])
|
||||
|
||||
document_list = []
|
||||
for page_content, metadata in zip(get_result['documents'], _metadatas):
|
||||
@ -23,13 +23,13 @@ def _get_result_to_documents(get_result: GetResult) -> List[Document]:
|
||||
|
||||
return document_list
|
||||
|
||||
|
||||
def _results_to_docs_and_scores(results: Any) -> List[Tuple[Document, float]]:
|
||||
"""
|
||||
from langchain_community.vectorstores.chroma import Chroma
|
||||
"""
|
||||
return [
|
||||
# TODO: Chroma can do batch querying,
|
||||
# we shouldn't hard code to the 1st result
|
||||
(Document(page_content=result[0], metadata=result[1] or {}), result[2])
|
||||
for result in zip(
|
||||
results["documents"][0],
|
||||
@ -40,14 +40,12 @@ def _results_to_docs_and_scores(results: Any) -> List[Tuple[Document, float]]:
|
||||
|
||||
|
||||
class ChromaKBService(KBService):
|
||||
|
||||
vs_path: str
|
||||
kb_path: str
|
||||
|
||||
client = None
|
||||
collection = None
|
||||
|
||||
|
||||
def vs_type(self) -> str:
|
||||
return SupportedVSType.CHROMADB
|
||||
|
||||
@ -75,16 +73,16 @@ class ChromaKBService(KBService):
|
||||
if not str(e) == f"Collection {self.kb_name} does not exist.":
|
||||
raise e
|
||||
|
||||
def do_search(self, query: str, top_k: int, score_threshold: float = SCORE_THRESHOLD) -> List[Tuple[Document, float]]:
|
||||
def do_search(self, query: str, top_k: int, score_threshold: float = SCORE_THRESHOLD) -> List[
|
||||
Tuple[Document, float]]:
|
||||
embed_func = EmbeddingsFunAdapter(self.embed_model)
|
||||
embeddings = embed_func.embed_query(query)
|
||||
query_result: QueryResult = self.collection.query(query_embeddings=embeddings, n_results=top_k)
|
||||
return _results_to_docs_and_scores(query_result)
|
||||
return _results_to_docs_and_scores(query_result)
|
||||
|
||||
def do_add_doc(self, docs: List[Document], **kwargs) -> List[Dict]:
|
||||
doc_infos = []
|
||||
data = self._docs_to_embeddings(docs)
|
||||
print(data)
|
||||
ids = [str(uuid.uuid1()) for _ in range(len(data["texts"]))]
|
||||
for _id, text, embedding, metadata in zip(ids, data["texts"], data["embeddings"], data["metadatas"]):
|
||||
self.collection.add(ids=_id, embeddings=embedding, metadatas=metadata, documents=text)
|
||||
|
||||
@ -2,6 +2,7 @@ from typing import List, Dict, Optional
|
||||
|
||||
from langchain.schema import Document
|
||||
from langchain.vectorstores.milvus import Milvus
|
||||
import os
|
||||
|
||||
from configs import kbs_config
|
||||
|
||||
@ -85,6 +86,7 @@ class MilvusKBService(KBService):
|
||||
def do_delete_doc(self, kb_file: KnowledgeFile, **kwargs):
|
||||
if self.milvus.col:
|
||||
filepath = kb_file.filepath.replace('\\', '\\\\')
|
||||
filename = os.path.basename(filepath)
|
||||
delete_list = [item.get("pk") for item in
|
||||
self.milvus.col.query(expr=f'source == "{filepath}"', output_fields=["pk"])]
|
||||
self.milvus.col.delete(expr=f'pk in {delete_list}')
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user