mirror of
https://github.com/RYDE-WORK/Langchain-Chatchat.git
synced 2026-02-06 06:49:48 +08:00
update pdf_loader.py
This commit is contained in:
parent
94b4599cda
commit
3712eec6a9
@ -29,7 +29,8 @@ class UnstructuredPaddlePDFLoader(UnstructuredFileLoader):
|
|||||||
img_list = page.get_images()
|
img_list = page.get_images()
|
||||||
for img in img_list:
|
for img in img_list:
|
||||||
pix = fitz.Pixmap(doc, img[0])
|
pix = fitz.Pixmap(doc, img[0])
|
||||||
|
if pix.n - pix.alpha >= 4:
|
||||||
|
pix = fitz.Pixmap(fitz.csRGB, pix)
|
||||||
pix.save(img_name)
|
pix.save(img_name)
|
||||||
|
|
||||||
result = ocr.ocr(img_name)
|
result = ocr.ocr(img_name)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user