Dev fix csv loader (#3404)

* fix: csv loader only load one column

* csv data merge with col name
This commit is contained in:
BlizBearer 2024-03-20 08:40:22 +08:00 committed by GitHub
parent d78b14721c
commit 07b7c966ed
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -59,23 +59,26 @@ class FilteredCSVLoader(CSVLoader):
docs = []
csv_reader = csv.DictReader(csvfile, **self.csv_args) # type: ignore
for i, row in enumerate(csv_reader):
if self.columns_to_read[0] in row:
content = row[self.columns_to_read[0]]
# Extract the source if available
source = (
row.get(self.source_column, None)
if self.source_column is not None
else self.file_path
)
metadata = {"source": source, "row": i}
content = []
for col in self.columns_to_read:
if col in row:
content.append(f'{col}:{str(row[col])}')
else:
raise ValueError(f"Column '{self.columns_to_read[0]}' not found in CSV file.")
content = '\n'.join(content)
# Extract the source if available
source = (
row.get(self.source_column, None)
if self.source_column is not None
else self.file_path
)
metadata = {"source": source, "row": i}
for col in self.metadata_columns:
if col in row:
metadata[col] = row[col]
for col in self.metadata_columns:
if col in row:
metadata[col] = row[col]
doc = Document(page_content=content, metadata=metadata)
docs.append(doc)
else:
raise ValueError(f"Column '{self.columns_to_read[0]}' not found in CSV file.")
doc = Document(page_content=content, metadata=metadata)
docs.append(doc)
return docs