mirror of
https://github.com/RYDE-WORK/Langchain-Chatchat.git
synced 2026-01-30 02:35:29 +08:00
Fix(File RAG): use jieba instead of cutword
This commit is contained in:
parent
d71c9b0a27
commit
1987063a76
@ -30,12 +30,13 @@ class EnsembleRetrieverService(BaseRetrieverService):
|
||||
}
|
||||
)
|
||||
# TODO: 换个不用torch的实现方式
|
||||
from cutword.cutword import Cutter
|
||||
cutter = Cutter()
|
||||
# from cutword.cutword import Cutter
|
||||
import jieba
|
||||
# cutter = Cutter()
|
||||
docs = list(vectorstore.docstore._dict.values())
|
||||
bm25_retriever = BM25Retriever.from_documents(
|
||||
docs,
|
||||
preprocess_func=cutter.cutword
|
||||
preprocess_func=jieba.lcut_for_search,
|
||||
)
|
||||
bm25_retriever.k = top_k
|
||||
ensemble_retriever = EnsembleRetriever(
|
||||
|
||||
@ -29,7 +29,8 @@ unstructured = "~0.11.0"
|
||||
python-magic-bin = {version = "*", platform = "win32"}
|
||||
SQLAlchemy = "~2.0.25"
|
||||
faiss-cpu = "~1.7.4"
|
||||
cutword = "0.1.0"
|
||||
#cutword = "0.1.0"
|
||||
jieba = "0.42.1"
|
||||
rank_bm25 = "0.2.2"
|
||||
# accelerate = "~0.24.1"
|
||||
# spacy = "~3.7.2"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user