Update configuration_progen.py
Browse files- configuration_progen.py +6 -21
configuration_progen.py
CHANGED
|
@@ -29,7 +29,7 @@ class ProGenConfig(PretrainedConfig):
|
|
| 29 |
vocab_size_emb=32,
|
| 30 |
vocab_size_lm_head=32,
|
| 31 |
n_positions=1024,
|
| 32 |
-
|
| 33 |
n_layer=12,
|
| 34 |
n_head=16,
|
| 35 |
rotary_dim=32,
|
|
@@ -49,13 +49,13 @@ class ProGenConfig(PretrainedConfig):
|
|
| 49 |
):
|
| 50 |
super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
|
| 51 |
|
| 52 |
-
self.vocab_size_emb = vocab_size_emb
|
| 53 |
-
self.vocab_size_lm_head = vocab_size_lm_head
|
| 54 |
-
self.n_positions = n_positions
|
| 55 |
-
self.
|
| 56 |
self.n_layer = n_layer
|
| 57 |
self.n_head = n_head
|
| 58 |
-
self.n_inner = n_inner
|
| 59 |
self.rotary_dim = rotary_dim
|
| 60 |
self.activation_function = activation_function
|
| 61 |
self.resid_pdrop = resid_pdrop
|
|
@@ -70,18 +70,3 @@ class ProGenConfig(PretrainedConfig):
|
|
| 70 |
self.bos_token_id = bos_token_id
|
| 71 |
self.eos_token_id = eos_token_id
|
| 72 |
|
| 73 |
-
@property
|
| 74 |
-
def max_position_embeddings(self):
|
| 75 |
-
return self.n_positions
|
| 76 |
-
|
| 77 |
-
@property
|
| 78 |
-
def hidden_size(self):
|
| 79 |
-
return self.n_embd
|
| 80 |
-
|
| 81 |
-
@property
|
| 82 |
-
def num_attention_heads(self):
|
| 83 |
-
return self.n_head
|
| 84 |
-
|
| 85 |
-
@property
|
| 86 |
-
def num_hidden_layers(self):
|
| 87 |
-
return self.n_layer
|
|
|
|
| 29 |
vocab_size_emb=32,
|
| 30 |
vocab_size_lm_head=32,
|
| 31 |
n_positions=1024,
|
| 32 |
+
embed_dim=1024,
|
| 33 |
n_layer=12,
|
| 34 |
n_head=16,
|
| 35 |
rotary_dim=32,
|
|
|
|
| 49 |
):
|
| 50 |
super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
|
| 51 |
|
| 52 |
+
self.vocab_size_emb = vocab_size_emb # input vocab size
|
| 53 |
+
self.vocab_size_lm_head = vocab_size_lm_head # output vocab size
|
| 54 |
+
self.n_positions = n_positions # context window size
|
| 55 |
+
self.embed_dim = embed_dim
|
| 56 |
self.n_layer = n_layer
|
| 57 |
self.n_head = n_head
|
| 58 |
+
self.n_inner = n_inner # inner dimension of the MLP
|
| 59 |
self.rotary_dim = rotary_dim
|
| 60 |
self.activation_function = activation_function
|
| 61 |
self.resid_pdrop = resid_pdrop
|
|
|
|
| 70 |
self.bos_token_id = bos_token_id
|
| 71 |
self.eos_token_id = eos_token_id
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|