diff --git a/loader/pdf_loader.py b/loader/pdf_loader.py index 8f55a942..e43169d2 100644 --- a/loader/pdf_loader.py +++ b/loader/pdf_loader.py @@ -39,7 +39,8 @@ class UnstructuredPaddlePDFLoader(UnstructuredFileLoader): result = ocr.ocr(img_name) ocr_result = [i[1][0] for line in result for i in line] fout.write("\n".join(ocr_result)) - os.remove(img_name) + if os.path.exists(img_name): + os.remove(img_name) return txt_file_path txt_file_path = pdf_ocr_txt(self.file_path)