From 062ea5264a770c61fdea34063a569b556cdc3df1 Mon Sep 17 00:00:00 2001 From: root <403644786@qq.com> Date: Fri, 21 Jun 2024 16:08:39 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BD=BF=E7=94=A8encode=E6=96=B9=E6=B3=95?= =?UTF-8?q?=EF=BC=8C=E4=BB=A3=E7=A0=81=E5=8F=AF=E8=AF=BB=E6=80=A7=E6=9B=B4?= =?UTF-8?q?=E5=BC=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- finetune/finetune.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/finetune/finetune.py b/finetune/finetune.py index 0fe24fd..3422d35 100644 --- a/finetune/finetune.py +++ b/finetune/finetune.py @@ -55,8 +55,8 @@ class SupervisedDataset(Dataset): self.data = json.load(open(data_path)) self.tokenizer = tokenizer self.model_max_length = model_max_length - self.user_tokens = self.tokenizer(user_tokens)['input_ids']#针对不同模型,都可以对应到<用户>的id - self.assistant_tokens = self.tokenizer(assistant_tokens)['input_ids']#针对不同模型,都可以对应到的id + self.user_tokens = self.tokenizer.encode(user_tokens)#针对不同模型,都可以对应到<用户>的id + self.assistant_tokens = self.tokenizer.encode(assistant_tokens)#针对不同模型,都可以对应到的id self.ignore_index = -100 item = self.preprocessing(self.data[0]) print("input:", self.tokenizer.decode(item["input_ids"]))