From f95d41ef471707ddc0f0a2430c1cbc9faf2dfdcc Mon Sep 17 00:00:00 2001
From: imClumsyPanda <littlepanda0716@gmail.com>
Date: Sat, 26 Aug 2023 11:45:01 +0800
Subject: [PATCH] =?UTF-8?q?[BUG]=20=E4=BF=AE=E5=A4=8Dcsv=E6=96=87=E4=BB=B6?=
 =?UTF-8?q?=E8=AF=BB=E5=8F=96=E5=90=8E=EF=BC=8C=E5=8D=95=E8=A1=8C=E6=95=B0?=
 =?UTF-8?q?=E6=8D=AE=E8=A2=AB=E5=88=86=E6=88=90=E5=A4=9A=E6=AE=B5=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 server/knowledge_base/utils.py | 43 ++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/server/knowledge_base/utils.py b/server/knowledge_base/utils.py
index da530495..34f20832 100644
--- a/server/knowledge_base/utils.py
+++ b/server/knowledge_base/utils.py
@@ -104,32 +104,35 @@ class KnowledgeFile:
         else:
             loader = DocumentLoader(self.filepath)
 
-        try:
-            if self.text_splitter_name is None:
+        if self.ext in ".csv":
+            docs = loader.load()
+        else:
+            try:
+                if self.text_splitter_name is None:
+                    text_splitter_module = importlib.import_module('langchain.text_splitter')
+                    TextSplitter = getattr(text_splitter_module, "SpacyTextSplitter")
+                    text_splitter = TextSplitter(
+                        pipeline="zh_core_web_sm",
+                        chunk_size=CHUNK_SIZE,
+                        chunk_overlap=OVERLAP_SIZE,
+                    )
+                    self.text_splitter_name = "SpacyTextSplitter"
+                else:
+                    text_splitter_module = importlib.import_module('langchain.text_splitter')
+                    TextSplitter = getattr(text_splitter_module, self.text_splitter_name)
+                    text_splitter = TextSplitter(
+                        chunk_size=CHUNK_SIZE,
+                        chunk_overlap=OVERLAP_SIZE)
+            except Exception as e:
+                print(e)
                 text_splitter_module = importlib.import_module('langchain.text_splitter')
-                TextSplitter = getattr(text_splitter_module, "SpacyTextSplitter")
+                TextSplitter = getattr(text_splitter_module, "RecursiveCharacterTextSplitter")
                 text_splitter = TextSplitter(
-                    pipeline="zh_core_web_sm",
                     chunk_size=CHUNK_SIZE,
                     chunk_overlap=OVERLAP_SIZE,
                 )
-                self.text_splitter_name = "SpacyTextSplitter"
-            else:
-                text_splitter_module = importlib.import_module('langchain.text_splitter')
-                TextSplitter = getattr(text_splitter_module, self.text_splitter_name)
-                text_splitter = TextSplitter(
-                    chunk_size=CHUNK_SIZE,
-                    chunk_overlap=OVERLAP_SIZE)
-        except Exception as e:
-            print(e)
-            text_splitter_module = importlib.import_module('langchain.text_splitter')
-            TextSplitter = getattr(text_splitter_module, "RecursiveCharacterTextSplitter")
-            text_splitter = TextSplitter(
-                chunk_size=CHUNK_SIZE,
-                chunk_overlap=OVERLAP_SIZE,
-            )
 
-        docs = loader.load_and_split(text_splitter)
+            docs = loader.load_and_split(text_splitter)
         print(docs[0])
         if using_zh_title_enhance:
             docs = zh_title_enhance(docs)