mirror of
https://github.com/RYDE-WORK/Langchain-Chatchat.git
synced 2026-02-07 07:23:29 +08:00
update pdf_loader.py
This commit is contained in:
parent
99ee2e9fd8
commit
5c0c1eed93
@ -39,7 +39,8 @@ class UnstructuredPaddlePDFLoader(UnstructuredFileLoader):
|
|||||||
result = ocr.ocr(img_name)
|
result = ocr.ocr(img_name)
|
||||||
ocr_result = [i[1][0] for line in result for i in line]
|
ocr_result = [i[1][0] for line in result for i in line]
|
||||||
fout.write("\n".join(ocr_result))
|
fout.write("\n".join(ocr_result))
|
||||||
os.remove(img_name)
|
if os.path.exists(img_name):
|
||||||
|
os.remove(img_name)
|
||||||
return txt_file_path
|
return txt_file_path
|
||||||
|
|
||||||
txt_file_path = pdf_ocr_txt(self.file_path)
|
txt_file_path = pdf_ocr_txt(self.file_path)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user