update pdf_loader.py

This commit is contained in:
imClumsyPanda 2023-05-31 18:03:37 +08:00
parent 99ee2e9fd8
commit 5c0c1eed93

View File

@ -39,6 +39,7 @@ class UnstructuredPaddlePDFLoader(UnstructuredFileLoader):
result = ocr.ocr(img_name) result = ocr.ocr(img_name)
ocr_result = [i[1][0] for line in result for i in line] ocr_result = [i[1][0] for line in result for i in line]
fout.write("\n".join(ocr_result)) fout.write("\n".join(ocr_result))
if os.path.exists(img_name):
os.remove(img_name) os.remove(img_name)
return txt_file_path return txt_file_path