mirror of
https://github.com/RYDE-WORK/ktransformers.git
synced 2026-02-02 12:39:27 +08:00
Fix: the tokens return by prefill_and_generate
This commit is contained in:
parent
be81269e38
commit
ee72cee050
@ -174,7 +174,7 @@ def prefill_and_generate(model, tokenizer, inputs, max_new_tokens=10000, use_cud
|
|||||||
prefill_time = first_token_time
|
prefill_time = first_token_time
|
||||||
print(stream.put(next_token.item()), end="", flush=True)
|
print(stream.put(next_token.item()), end="", flush=True)
|
||||||
generated_ids[:, seq_length] = next_token
|
generated_ids[:, seq_length] = next_token
|
||||||
tokens.append(next_token)
|
tokens.append(int(next_token))
|
||||||
inputs = torch.cat((inputs, next_token.unsqueeze(0)), dim=-1)
|
inputs = torch.cat((inputs, next_token.unsqueeze(0)), dim=-1)
|
||||||
cache_position = torch.tensor([seq_length], device=torch_device)
|
cache_position = torch.tensor([seq_length], device=torch_device)
|
||||||
position_ids = cache_position.unsqueeze(0)
|
position_ids = cache_position.unsqueeze(0)
|
||||||
@ -191,7 +191,7 @@ def prefill_and_generate(model, tokenizer, inputs, max_new_tokens=10000, use_cud
|
|||||||
next_token = decode_one_tokens(cuda_graph_runner, next_token.unsqueeze(0), position_ids, cache_position, past_key_values, use_cuda_graph).to(torch_device)
|
next_token = decode_one_tokens(cuda_graph_runner, next_token.unsqueeze(0), position_ids, cache_position, past_key_values, use_cuda_graph).to(torch_device)
|
||||||
inputs = torch.cat((inputs, next_token.unsqueeze(0)), dim=-1)
|
inputs = torch.cat((inputs, next_token.unsqueeze(0)), dim=-1)
|
||||||
generated_ids[:, cache_position] = next_token.int()
|
generated_ids[:, cache_position] = next_token.int()
|
||||||
tokens.append(next_token.int())
|
tokens.append(int(next_token))
|
||||||
seq_length += 1
|
seq_length += 1
|
||||||
|
|
||||||
if next_token[0].item() == tokenizer.eos_token_id or tokenizer.decode(next_token) == '<|im_end|>':
|
if next_token[0].item() == tokenizer.eos_token_id or tokenizer.decode(next_token) == '<|im_end|>':
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user