Merge pull request #2949 from criwits/dev

增加对 `.htm` 扩展名的显式支持,以解决 `.htm` 格式网页可被上传但不被入库的问题
This commit is contained in:
zR 2024-02-07 00:22:53 +08:00 committed by GitHub
commit 041e964b30
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -85,7 +85,7 @@ def list_files_from_folder(kb_name: str):
return result
LOADER_DICT = {"UnstructuredHTMLLoader": ['.html'],
LOADER_DICT = {"UnstructuredHTMLLoader": ['.html', '.htm'],
"MHTMLLoader": ['.mhtml'],
"UnstructuredMarkdownLoader": ['.md'],
"JSONLoader": [".json"],