From 74ecbcce5e1fc3877af2fdbdcfd4a70b76587721 Mon Sep 17 00:00:00 2001 From: Xiang Long Date: Wed, 6 Mar 2024 17:25:41 +0800 Subject: [PATCH] Fix sft_dataset issue and naming error --- finetune/finetune.py | 9 ++++----- finetune/lora_finetune.ipynb | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/finetune/finetune.py b/finetune/finetune.py index 9b0ce1b..1bf3d4e 100644 --- a/finetune/finetune.py +++ b/finetune/finetune.py @@ -1,13 +1,13 @@ # -*- coding: utf-8 -*- import json -from typing import Dict, Optional from dataclasses import dataclass, field +from typing import Dict, Optional import torch -from torch.utils.data import Dataset - import transformers -from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer +from torch.utils.data import Dataset +from transformers import (AutoModelForCausalLM, AutoTokenizer, Trainer, + TrainingArguments) @dataclass @@ -90,7 +90,6 @@ class SupervisedDataset(Dataset): label_ids += ( [self.ignore_index] * len(self.assistant_tokens) + content_ids - + [self.tokenizer.eos_token_id] ) input_ids = input_ids[: self.model_max_length] diff --git a/finetune/lora_finetune.ipynb b/finetune/lora_finetune.ipynb index 6cf5b41..35328be 100644 --- a/finetune/lora_finetune.ipynb +++ b/finetune/lora_finetune.ipynb @@ -101,7 +101,7 @@ "metadata": {}, "outputs": [], "source": [ - "!bash lora_finetune_ds.sh" + "!bash lora_finetune.sh" ] } ],