Skip to content

Commit

Permalink
address #8
Browse files Browse the repository at this point in the history
  • Loading branch information
lucidrains committed Dec 2, 2024
1 parent ec10698 commit 29068b5
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 55 deletions.
78 changes: 24 additions & 54 deletions pi_zero_pytorch/pi_zero.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,8 @@

def create_pizero_attn_mask(
prefix_causal_length,
mask: Bool['b n'],
internal_state_offset_and_len: tuple[int, int] | None = None
mask: Bool['b n']
):

state_offset, state_len = default(internal_state_offset_and_len, (0, 0))
state_left, state_right = state_offset, state_offset + state_len

# the pi-zero attention is a triangular causal mask, but bidirectional attention for the actions at the very right hand side

def mask_fn(batch_index, head_index, query_index, key_index):
Expand All @@ -88,12 +83,7 @@ def mask_fn(batch_index, head_index, query_index, key_index):
query_index >= prefix_causal_length
)

bidirectional_internal_state_mask = (
state_left <= key_index and key_index < state_right and
state_left <= query_index and query_index < state_right
)

return (key_mask and causal_mask) or bidirectional_action_mask or bidirectional_internal_state_mask
return (key_mask and causal_mask) or bidirectional_action_mask

return mask_fn

Expand Down Expand Up @@ -359,7 +349,6 @@ def forward(
actions,
rotary_emb = None,
mask: Bool['b n'] | None = None,
internal_state_offset_and_len: tuple[int, int] | None = None,
actions_value_residual: Tensor | None = None,
return_keys_values = False,
flex_attn_fn: Callable | None = None
Expand Down Expand Up @@ -408,11 +397,6 @@ def forward(

causal_mask[..., seq_len:, seq_len:] = False # actions have bidirectional attention, lining up with Transfusion paper

if exists(internal_state_offset_and_len):
offset, length = internal_state_offset_and_len
state_slice = slice(offset, offset + length)
causal_mask[..., state_slice, state_slice] = False

sim = sim.masked_fill(causal_mask, max_neg_value(sim))

attn = sim.softmax(dim = -1)
Expand Down Expand Up @@ -976,13 +960,28 @@ def forward(
else:
memory_tokens = actions.new_empty((batch, 0, self.dim))

# pack into [action registers] [actions] [memory tokens (write)]
# joint state + additional internal states

action_tokens, inverse_pack_action_registers = pack_with_inverse([action_register_tokens, action_tokens, memory_tokens], 'b * d')
joint_state_tokens = self.to_joint_state_tokens(joint_state)

action_with_registers_length = action_tokens.shape[-2]
# additional internal state tokens

if not exists(internal_state_tokens):
internal_state_tokens = joint_state_tokens.new_empty((batch, 0, self.dim_internal_state))

internal_state_offset_and_len = None
internal_state_tokens = self.to_internal_state_tokens(internal_state_tokens)

# pack into [action registers] [internal + joint states] [actions] [memory tokens (write)]

action_tokens, inverse_pack_action_registers = pack_with_inverse([
action_register_tokens,
joint_state_tokens,
internal_state_tokens,
action_tokens,
memory_tokens
], 'b * d')

action_with_registers_length = action_tokens.shape[-2]

if not inferencing:
# language
Expand Down Expand Up @@ -1015,10 +1014,6 @@ def forward(

visual_tokens = self.maybe_to_image_tokens(visual_tokens)

# joint state

joint_state_tokens = self.to_joint_state_tokens(joint_state)

# maybe reward tokens

if not exists(reward_tokens):
Expand All @@ -1029,13 +1024,6 @@ def forward(
if self.training and random() < self.reward_tokens_dropout_prob:
reward_tokens = reward_tokens[:, 0:0]

# additional internal state tokens

if not exists(internal_state_tokens):
internal_state_tokens = joint_state_tokens.new_empty((batch, 0, self.dim_internal_state))

internal_state_tokens = self.to_internal_state_tokens(internal_state_tokens)

# additional external states

if exists(external_states):
Expand All @@ -1050,30 +1038,13 @@ def forward(
if not exists(past_recurrent_memory_tokens):
past_recurrent_memory_tokens = visual_tokens.new_empty((batch, 0, self.dim))

# allow joint and internal states to have bidirectional attention

internal_state_len = joint_state_tokens.shape[-2] + internal_state_tokens.shape[-2]

internal_state_offset = (
external_state_tokens.shape[-2] +
visual_tokens.shape[-2] +
language_tokens.shape[-2]
)

internal_state_offset_and_len = (
internal_state_offset,
internal_state_len
)

# concat visual rep with language

state_tokens, inverse_packed_states = pack_with_inverse([
past_recurrent_memory_tokens,
external_state_tokens,
visual_tokens,
language_tokens,
joint_state_tokens,
internal_state_tokens,
reward_tokens
], 'b * d')

Expand All @@ -1094,7 +1065,7 @@ def forward(

# rotary embeddings

seq = torch.cumsum(mask.float(), dim = -1)
seq = mask.float().cumsum(dim = -1)
rotary_emb = self.rotary_emb(seq)

rotary_emb = rearrange(rotary_emb, 'b n d -> b 1 n d')
Expand All @@ -1112,7 +1083,6 @@ def forward(
create_pizero_attn_mask(
prefix_length,
mask = mask,
internal_state_offset_and_len = internal_state_offset_and_len
),
Q_LEN = seq_len,
KV_LEN = seq_len,
Expand Down Expand Up @@ -1147,7 +1117,7 @@ def forward(

action_tokens = attn_ada_rmsnorm(action_tokens, time_cond)

(state_attn_out, actions_attn_out), (state_keys, state_values, action_keys, action_values) = attn(state_tokens, action_tokens, rotary_emb = rotary_emb, flex_attn_fn = flex_attn_fn, actions_value_residual = actions_value_residual, mask = mask, internal_state_offset_and_len = internal_state_offset_and_len, return_keys_values = True)
(state_attn_out, actions_attn_out), (state_keys, state_values, action_keys, action_values) = attn(state_tokens, action_tokens, rotary_emb = rotary_emb, flex_attn_fn = flex_attn_fn, actions_value_residual = actions_value_residual, mask = mask, return_keys_values = True)

state_cached_keys_values.append((state_keys, state_values))

Expand Down Expand Up @@ -1200,7 +1170,7 @@ def forward(

tokens = self.final_norm_softclamp(tokens)

action_register_tokens, action_tokens, written_memory_tokens = inverse_pack_action_registers(action_tokens)
*_, action_tokens, written_memory_tokens = inverse_pack_action_registers(action_tokens)

action_tokens = self.final_norm_softclamp(action_tokens)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "pi-zero-pytorch"
version = "0.0.34"
version = "0.0.36"
description = "π0 in Pytorch"
authors = [
{ name = "Phil Wang", email = "[email protected]" }
Expand Down

0 comments on commit 29068b5

Please sign in to comment.