from transformers import PretrainedConfig class LIMEConfig(PretrainedConfig): model_type = "lime" def __init__( self, vocab_size=50000, d_model=1536, num_encoder_layers=0, num_decoder_layers=32, num_heads=24, dff=6144, dropout_rate=0.0, max_position_embeddings=512, pad_token_id=0, eos_token_id=1, use_encoder=False, use_flash=True, multiple_of=256, **kwargs ): super().__init__( pad_token_id=pad_token_id, eos_token_id=eos_token_id, **kwargs ) self.vocab_size = vocab_size self.d_model = d_model self.num_encoder_layers = num_encoder_layers self.num_decoder_layers = num_decoder_layers self.num_heads = num_heads self.dff = dff self.dropout_rate = dropout_rate self.max_position_embeddings = max_position_embeddings self.pad_token_id = pad_token_id self.eos_token_id = eos_token_id self.use_encoder = use_encoder self.use_flash = use_flash self.multiple_of = multiple_of # For Transformers library. self.is_decoder = True self.is_encoder_decoder = False self.tie_word_embeddings = True self.use_cache = False