update pdf_loader.py

This commit is contained in:
imClumsyPanda 2023-05-31 18:03:37 +08:00
parent 99ee2e9fd8
commit 5c0c1eed93

View File

@ -39,7 +39,8 @@ class UnstructuredPaddlePDFLoader(UnstructuredFileLoader):
result = ocr.ocr(img_name)
ocr_result = [i[1][0] for line in result for i in line]
fout.write("\n".join(ocr_result))
os.remove(img_name)
if os.path.exists(img_name):
os.remove(img_name)
return txt_file_path
txt_file_path = pdf_ocr_txt(self.file_path)