Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 32 additions & 8 deletions Coconut/coconut.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,8 @@ def generate(

tokens = input_ids[0].detach().tolist()

past_key_values = None

labels = input_ids.clone() # placeholder. not used.
outputs = self.forward(
input_ids,
Expand All @@ -224,39 +226,61 @@ def generate(
).reshape(1, -1),
)
inputs_embeds = outputs.inputs_embeds

pref_out = self.base_causallm(
inputs_embeds=inputs_embeds,
use_cache=True
)

self.gen_forward_cnt += 1

past_key_values = pref_out.past_key_values

# get the first token using the current hidden state
next_token = torch.argmax(outputs.logits[0, -1]).item()
next_token = torch.argmax(pref_out.logits[0, -1]).item()
tokens.append(next_token)
new_token_embed = self.embedding(
torch.tensor(next_token, device=input_ids.device)
torch.tensor([next_token], device=input_ids.device)
).view(1, 1, -1)
new_inputs_embeds = torch.cat((inputs_embeds, new_token_embed), dim=1)
full_inputs_embeds = torch.cat((inputs_embeds, new_token_embed), dim=1)


# get other tokens
for _ in range(max_new_tokens - 1):
outputs = self.base_causallm(inputs_embeds=new_inputs_embeds)
outputs = self.base_causallm(
inputs_embeds=new_token_embed,
past_key_values=past_key_values,
use_cache=True
)

past_key_values = outputs.past_key_values

self.gen_forward_cnt += 1
next_token = torch.argmax(outputs.logits[0, -1]).item()
if next_token == self.eos_token_id:
break
tokens.append(next_token)
new_token_embed = self.embedding(
torch.tensor(next_token, device=input_ids.device)
torch.tensor([next_token], device=input_ids.device)
).view(1, 1, -1)
new_inputs_embeds = torch.cat((new_inputs_embeds, new_token_embed), dim=1)
full_inputs_embeds = torch.cat((full_inputs_embeds, new_token_embed), dim=1)

if synced_gpus:
# in FSDP, the number of forward pass need to be the same across devices
while (
self.gen_forward_cnt < max_new_tokens + MAX_N_LATENT
): # leave some room for latent tokens
self.gen_forward_cnt += 1
_ = self.base_causallm(inputs_embeds=new_inputs_embeds)
_ = self.base_causallm(
inputs_embeds=new_token_embed,
past_key_values=past_key_values,
use_cache=True
)
past_key_values = _.past_key_values

if output_embedding:
# for analysis purpose
return torch.tensor(tokens).view(1, -1), new_inputs_embeds
return torch.tensor(tokens).view(1, -1), full_inputs_embeds

else:
return torch.tensor(tokens).view(1, -1)
Expand Down