From b808010417f8e8b91bdf5545ae6383eb082c2475 Mon Sep 17 00:00:00 2001 From: root <403644786@qq.com> Date: Fri, 21 Jun 2024 16:19:01 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B3=A8=E9=87=8A=E5=89=8D=E5=8A=A0=E4=BA=86?= =?UTF-8?q?=E7=A9=BA=E6=A0=BC=EF=BC=8C=E4=BF=9D=E6=8C=81=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=E8=A7=84=E8=8C=83=E6=80=A7=EF=BC=8C=E5=88=98=E4=B8=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- finetune/finetune.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/finetune/finetune.py b/finetune/finetune.py index 3422d35..57ceab4 100644 --- a/finetune/finetune.py +++ b/finetune/finetune.py @@ -55,8 +55,8 @@ class SupervisedDataset(Dataset): self.data = json.load(open(data_path)) self.tokenizer = tokenizer self.model_max_length = model_max_length - self.user_tokens = self.tokenizer.encode(user_tokens)#针对不同模型,都可以对应到<用户>的id - self.assistant_tokens = self.tokenizer.encode(assistant_tokens)#针对不同模型,都可以对应到的id + self.user_tokens = self.tokenizer.encode(user_tokens) #针对不同模型,都可以对应到<用户>的id + self.assistant_tokens = self.tokenizer.encode(assistant_tokens) #针对不同模型,都可以对应到的id self.ignore_index = -100 item = self.preprocessing(self.data[0]) print("input:", self.tokenizer.decode(item["input_ids"]))