Skip to content

Commit

Permalink
InfiniGPT config added
Browse files Browse the repository at this point in the history
  • Loading branch information
ArionDas committed Jun 25, 2024
1 parent c9da909 commit ae52ec2
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 5 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ torch
pandas
ipykernel
tiktoken
numpy
numpy==1.26.4
transformers
15 changes: 13 additions & 2 deletions src/infini_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
from transformers.modeling_utils import Cache
from transformers import AutoConfig
from rotary_embeddings import RotaryEmbedding
from infini_gpt_config import INFINIGPT_CONFIG

### Rotary Embeddings copied from jlamprou repo
### Rotary Embeddings from jlamprou repo
def rotate_half(x):
"""Rotates half the hidden dims of the input."""
x1 = x[..., : x.shape[-1] // 2]
Expand Down Expand Up @@ -137,6 +138,10 @@ def forward(
key_states = self.k_proj(hidden_states)
value_states = self.q_proj(hidden_states)

"""
bsz = batch_size
q_len = sequence_length
"""
query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1,2)
key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
Expand Down Expand Up @@ -179,11 +184,17 @@ def forward(
dropout_p=self.attention_dropout if self.training else 0.0,
)

combined_output = self.long_term_injection_(attn_output, memory_output)
combined_output = self.long_term_memory_injection_(attn_output, memory_output)

#### output for this segment
combined_output = combined_output.transpose(1,2).contiguous()
combined_output = combined_output.view(bsz, q_len, self.hidden_size)
final_output = self.o_proj(combined_output)
return final_output, None, past_key_value


""" Confusions :
1) **Cache** ?? How to use it to store the past key value states in the input stream?
2) Tensor dimensions have to be matched from the dataloader with that of infini_attention.
"""

14 changes: 14 additions & 0 deletions src/infini_gpt_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
INFINIGPT_CONFIG = {
"vocab_size": 50257, # Vocabulary size
"context_length": 1024, # Context length
"emb_dim": 128, # Embedding dimension
"num_attention_heads": 8, # Number of attention heads
"n_layers": 12, # Number of layers
"drop_rate": 0.1, # Dropout rate
"qkv_bias": False, # Query-Key-Value bias
"hidden_size": 4096, # Hidden size
}

## segment length = 2048
## sequence length = 32768
## num of segments = 32768/2048 = 16
6 changes: 4 additions & 2 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,17 @@
from torch.utils.data import Dataset, DataLoader
from data_preprocessing import InfiniGPTDataset, InfiniGPTDataLoader
from attention import CausalSelfAttention, MultiHeadAttention
from infini_gpt_config import INFINIGPT_CONFIG


def main():

### Hyperparameters
vocab_size = 50257
config = INFINIGPT_CONFIG
vocab_size = config["vocab_size"]
output_dim = 256
max_length = 4
context_length = 1024
context_length = config["context_length"]

### Embeddings
token_embedding_layer = nn.Embedding(vocab_size, output_dim)
Expand Down

0 comments on commit ae52ec2

Please sign in to comment.