From 5c0c1eed9313e9dbd48ce540c39e2ed09dc59610 Mon Sep 17 00:00:00 2001 From: imClumsyPanda Date: Wed, 31 May 2023 18:03:37 +0800 Subject: [PATCH] update pdf_loader.py --- loader/pdf_loader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/loader/pdf_loader.py b/loader/pdf_loader.py index 8f55a942..e43169d2 100644 --- a/loader/pdf_loader.py +++ b/loader/pdf_loader.py @@ -39,7 +39,8 @@ class UnstructuredPaddlePDFLoader(UnstructuredFileLoader): result = ocr.ocr(img_name) ocr_result = [i[1][0] for line in result for i in line] fout.write("\n".join(ocr_result)) - os.remove(img_name) + if os.path.exists(img_name): + os.remove(img_name) return txt_file_path txt_file_path = pdf_ocr_txt(self.file_path)