mirror of
https://github.com/RYDE-WORK/Langchain-Chatchat.git
synced 2026-02-06 23:15:53 +08:00
update pdf read strategy
This commit is contained in:
parent
2987c9cd52
commit
5852b4c62e
@ -22,7 +22,7 @@ def load_file(filepath, sentence_size=SENTENCE_SIZE):
|
|||||||
loader = UnstructuredFileLoader(filepath, mode="elements")
|
loader = UnstructuredFileLoader(filepath, mode="elements")
|
||||||
docs = loader.load()
|
docs = loader.load()
|
||||||
elif filepath.lower().endswith(".pdf"):
|
elif filepath.lower().endswith(".pdf"):
|
||||||
loader = UnstructuredFileLoader(filepath)
|
loader = UnstructuredFileLoader(filepath, strategy="fast")
|
||||||
textsplitter = ChineseTextSplitter(pdf=True, sentence_size=sentence_size)
|
textsplitter = ChineseTextSplitter(pdf=True, sentence_size=sentence_size)
|
||||||
docs = loader.load_and_split(textsplitter)
|
docs = loader.load_and_split(textsplitter)
|
||||||
else:
|
else:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user