lhallee commited on
Commit
7bfd0cd
·
verified ·
1 Parent(s): 2600ac5

Upload modeling_dplm2.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. modeling_dplm2.py +3 -5
modeling_dplm2.py CHANGED
@@ -365,9 +365,9 @@ import torch
365
  import torch.nn as nn
366
  from torch.nn import functional as F
367
  from dataclasses import dataclass
368
- from typing import Dict, List, Optional, Tuple, Union
369
 
370
- from transformers import AutoTokenizer, EsmTokenizer
371
  from transformers.modeling_outputs import (
372
  BaseModelOutputWithPastAndCrossAttentions,
373
  BaseModelOutputWithPoolingAndCrossAttentions,
@@ -1062,7 +1062,6 @@ class FAST_DPLM2_ENCODER(DPLM2PreTrainedModel, EmbeddingMixin):
1062
 
1063
  class DPLM2Model(DPLM2PreTrainedModel, EmbeddingMixin):
1064
  config_class = DPLM2Config
1065
-
1066
  def __init__(self, config, add_pooling_layer=True):
1067
  DPLM2PreTrainedModel.__init__(self, config)
1068
  self.config = config
@@ -1129,7 +1128,6 @@ class DPLM2Model(DPLM2PreTrainedModel, EmbeddingMixin):
1129
 
1130
  class DPLM2ForMaskedLM(DPLM2PreTrainedModel, EmbeddingMixin):
1131
  config_class = DPLM2Config
1132
-
1133
  def __init__(self, config, dropout: float = 0.1, vocab_size: Optional[int] = None):
1134
  config.hidden_dropout_prob = dropout
1135
  config.tie_word_embeddings = False
@@ -1143,7 +1141,7 @@ class DPLM2ForMaskedLM(DPLM2PreTrainedModel, EmbeddingMixin):
1143
  self.pad_id = config.pad_token_id
1144
  self.tokenizer = self.__class__.tokenizer
1145
  if isinstance(config._name_or_path, str) and len(config._name_or_path) > 0:
1146
- self.tokenizer = AutoTokenizer.from_pretrained(config._name_or_path)
1147
 
1148
  def get_input_embeddings(self) -> nn.Module:
1149
  return self.esm.get_input_embeddings()
 
365
  import torch.nn as nn
366
  from torch.nn import functional as F
367
  from dataclasses import dataclass
368
+ from typing import List, Optional, Tuple, Union
369
 
370
+ from transformers import EsmTokenizer
371
  from transformers.modeling_outputs import (
372
  BaseModelOutputWithPastAndCrossAttentions,
373
  BaseModelOutputWithPoolingAndCrossAttentions,
 
1062
 
1063
  class DPLM2Model(DPLM2PreTrainedModel, EmbeddingMixin):
1064
  config_class = DPLM2Config
 
1065
  def __init__(self, config, add_pooling_layer=True):
1066
  DPLM2PreTrainedModel.__init__(self, config)
1067
  self.config = config
 
1128
 
1129
  class DPLM2ForMaskedLM(DPLM2PreTrainedModel, EmbeddingMixin):
1130
  config_class = DPLM2Config
 
1131
  def __init__(self, config, dropout: float = 0.1, vocab_size: Optional[int] = None):
1132
  config.hidden_dropout_prob = dropout
1133
  config.tie_word_embeddings = False
 
1141
  self.pad_id = config.pad_token_id
1142
  self.tokenizer = self.__class__.tokenizer
1143
  if isinstance(config._name_or_path, str) and len(config._name_or_path) > 0:
1144
+ self.tokenizer = EsmTokenizer.from_pretrained(config._name_or_path)
1145
 
1146
  def get_input_embeddings(self) -> nn.Module:
1147
  return self.esm.get_input_embeddings()