Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

LICENSE +25 -0
README.md +125 -3
amplify_te.py +307 -0
config.json +38 -0
model.safetensors +3 -0
special_tokens_map.json +37 -0
tokenizer.json +154 -0
tokenizer_config.json +59 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,25 @@

+# SPDX-FileCopyrightText: Copyright (c) 2024 chandar-lab
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: MIT
+MIT License
+Copyright (c) 2024 chandar-lab
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,3 +1,125 @@
----
-license: mit
----

+---
+license: mit
+datasets:
+  - chandar-lab/UR100P
+language:
+  - en
+tags:
+  - biology
+---
+> [!NOTE]
+> This model has been optimized using NVIDIA's [TransformerEngine](https://github.com/NVIDIA/TransformerEngine)
+> library. Slight numerical differences may be observed between the original model and the optimized
+> model. For instructions on how to install TransformerEngine, please refer to the
+> [official documentation](https://github.com/NVIDIA/TransformerEngine?tab=readme-ov-file#installation).
+>
+> The original xformers-based models are available at [chandar-lab/AMPLIFY](https://huggingface.co/chandar-lab/AMPLIFY_350M).
+## AMPLIFY
+AMPLIFY is an efficient, state-of-the-art protein language model pre-trained using masked language modeling on UniRef100, OAS, and SCOP ([UR100P](https://huggingface.co/datasets/chandar-lab/UR100P)). AMPLIFY can generate residue and protein embeddings, suggest mutations, differentiate disordered proteins from non-protein sequences, and much more. AMPLIFY is available in two sizes, 120M and 350M parameters, with the `_base` models not extended beyond 512 residues (Stage 1). The model architecture and pre-training procedure are detailed below. For more details, please refer to the [accompanying paper](https://www.biorxiv.org/content/10.1101/2024.09.23.614603v1).
+- [`AMPLIFY_350M`](https://huggingface.co/nvidia/AMPLIFY_350M)
+- [`AMPLIFY_350M_base`](https://huggingface.co/chandar-lab/AMPLIFY_350M_base)
+- [`AMPLIFY_120M`](https://huggingface.co/nvidia/AMPLIFY_120M)
+- [`AMPLIFY_120M_base`](https://huggingface.co/chandar-lab/AMPLIFY_120M_base)
+### Model Description
+|                                | AMPLIFY 120M | AMPLIFY 350M |
+| :----------------------------- | -----------: | -----------: |
+| `hidden-size`                  |          640 |          960 |
+| `num-hidden-layers`            |           24 |           32 |
+| `num-attention-heads`          |           10 |           15 |
+| `intermediate-size`            |         2560 |         3840 |
+| `max-position-embeddings`      |         2048 |         2048 |
+| `vocab-size`                   |           27 |           27 |
+| `rope-theta`                   |        10000 |        10000 |
+| `dropout-prob`                 |            0 |            0 |
+| `embedding-init-range`         |         0.02 |         0.02 |
+| `norm-eps`                     |      1.0e-05 |      1.0e-05 |
+| `hidden-act`                   |       swiglu |       swiglu |
+| `pre-activation-layer-norm`    |         true |         true |
+| `layer-norm-after-embedding`   |        false |        false |
+| `layer-norm-before-last-layer` |         true |         true |
+| `rms-norm`                     |         true |         true |
+| `ffn-bias`                     |        false |        false |
+| `attn-bias`                    |        false |        false |
+### Training Description
+|                     |     Stage 1 |                      Stage 2 |
+| :------------------ | ----------: | ---------------------------: |
+| `dataset`           |      UR100P |                       UR100P |
+| `max-steps`         |     1000000 | 25000 (120M) or 50000 (350M) |
+| `max-length`        |         512 |                         2048 |
+| `optimizer`         |       adamw |                        adamw |
+| `lr`                |       0.001 |                       0.0001 |
+| `betas`             | (0.9, 0.95) |                  (0.9, 0.95) |
+| `eps`               |     1.0e-08 |                      1.0e-08 |
+| `weight-decay`      |        0.01 |                         0.01 |
+| `scheduler`         | cosinedecay |                         none |
+| `warmup-steps`      |       1,000 |                         none |
+| `final-step`        |     900,000 |                         none |
+| `warmup-steps`      |       1,000 |                         none |
+| `gradient-clipping` |         1.0 |                          1.0 |
+| `tf32`              |        true |                         true |
+| `mixed-precision`   |        bf16 |                         bf16 |
+| `padding`           |  max-length |                   max-length |
+| `random-truncate`   |        true |                         true |
+| `mask-probability`  |        0.15 |                         0.15 |
+| `total-batch-size`  |        4096 |                         4096 |
+| `deepspeed`         |        true |                         true |
+| `zero-stage`        |           3 |                            3 |
+## Get Started
+```python
+from transformers import AutoModel
+from transformers import AutoTokenizer
+from datasets import load_dataset
+# Load AMPLIFY and tokenizer
+model = AutoModel.from_pretrained("nvidia/AMPLIFY_350M", trust_remote_code=True)
+tokenizer = AutoTokenizer.from_pretrained("nvidia/AMPLIFY_350M", trust_remote_code=True)
+# Move the model to GPU (required due to Flash Attention)
+model = model.to("cuda")
+# Load the UniProt validation set
+dataset = load_dataset("chandar-lab/UR100P", data_dir="UniProt", split="test")
+for sample in dataset:
+    # Protein
+    print("Sample: ", sample["name"], sample["sequence"])
+    # Tokenize the protein
+    input = tokenizer.encode(sample["sequence"], return_tensors="pt")
+    print("Input: ", input)
+    # Move to the GPU and make a prediction
+    input = input.to("cuda")
+    output = model(input)
+    print("Output: ", output)
+    break
+```
+## Citations
+If you find the models useful in your research, we ask that you cite the paper:
+```bibtex
+@article{Fournier2024.09.23.614603,
+	title        = {Protein Language Models: Is Scaling Necessary?},
+	author       = {Fournier, Quentin and Vernon, Robert M. and van der Sloot, Almer and Schulz, Benjamin and Chandar, Sarath and Langmead, Christopher James},
+	year         = {2024},
+	journal      = {bioRxiv},
+	publisher    = {Cold Spring Harbor Laboratory},
+	doi          = {10.1101/2024.09.23.614603},
+	url          = {https://www.biorxiv.org/content/early/2024/09/23/2024.09.23.614603},
+	elocation-id = {2024.09.23.614603},
+	eprint       = {https://www.biorxiv.org/content/early/2024/09/23/2024.09.23.614603.full.pdf}
+}
+```

amplify_te.py ADDED Viewed

	@@ -0,0 +1,307 @@

+# SPDX-FileCopyrightText: Copyright (c) 2024 chandar-lab
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+# Adapted from https://huggingface.co/chandar-lab/AMPLIFY_120M/blob/main/amplify.py
+import torch
+import transformer_engine.pytorch
+from torch import nn
+from transformer_engine.pytorch.attention.rope import RotaryPositionEmbedding
+from transformers.configuration_utils import PretrainedConfig
+from transformers.modeling_outputs import BaseModelOutput, MaskedLMOutput
+from transformers.modeling_utils import PreTrainedModel
+class AMPLIFYConfig(PretrainedConfig):
+    """AMPLIFY model configuration."""
+    model_type = "AMPLIFY"
+    # All config parameters must have a default value.
+    def __init__(
+        self,
+        hidden_size: int = 960,
+        num_hidden_layers: int = 32,
+        num_attention_heads: int = 15,
+        intermediate_size: int = 3840,
+        dropout_prob: float = 0,
+        embedding_init_range: float = 0.02,
+        decoder_init_range: float = 0.02,
+        rms_norm: bool = True,
+        norm_eps: float = 1e-05,
+        hidden_act: str = "SwiGLU",
+        layer_norm_after_embedding: bool = False,
+        layer_norm_before_last_layer: bool = True,
+        vocab_size: int = 27,
+        ffn_bias: bool = False,
+        att_bias: bool = False,
+        pad_token_id: int = 0,
+        max_length: int = 2048,
+        **kwargs,
+    ):
+        """Initialize a AMPLIFYConfig.
+        Args:
+            hidden_size (int): The hidden size of the model.
+            num_hidden_layers (int): The number of hidden layers in the model.
+            num_attention_heads (int): The number of attention heads in the model.
+            intermediate_size (int): The intermediate size of the model.
+            dropout_prob (float): The dropout probability of the model.
+            embedding_init_range (float): The range of the embedding initialization.
+            decoder_init_range (float): The range of the decoder initialization.
+            rms_norm (bool): Whether to use RMSNorm.
+            norm_eps (float): The epsilon for the normalization.
+            hidden_act (str): The activation function of the model.
+            layer_norm_after_embedding (bool): Whether to use layer normalization after the embedding.
+            layer_norm_before_last_layer (bool): Whether to use layer normalization before the last layer.
+            vocab_size (int): The vocabulary size of the model.
+            ffn_bias (bool): Whether to use bias in the feedforward network.
+            att_bias (bool): Whether to use bias in the attention.
+            pad_token_id (int): The padding token id.
+            max_length (int): The maximum length of the sequence.
+            **kwargs: Additional arguments.
+        """
+        super().__init__(**kwargs)
+        self.hidden_size = hidden_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.intermediate_size = intermediate_size
+        self.dropout_prob = dropout_prob
+        self.embedding_init_range = embedding_init_range
+        self.decoder_init_range = decoder_init_range
+        self.rms_norm = rms_norm
+        self.norm_eps = norm_eps
+        self.hidden_act = hidden_act
+        self.layer_norm_after_embedding = layer_norm_after_embedding
+        self.layer_norm_before_last_layer = layer_norm_before_last_layer
+        self.vocab_size = vocab_size
+        self.ffn_bias = ffn_bias
+        self.att_bias = att_bias
+        self.pad_token_id = pad_token_id
+        self.max_length = max_length
+class AMPLIFYPreTrainedModel(PreTrainedModel):
+    """AMPLIFY pre-trained model."""
+    config_class = AMPLIFYConfig
+    def _init_weights(self, module):
+        if isinstance(
+            module, (nn.Linear, transformer_engine.pytorch.Linear, transformer_engine.pytorch.LayerNormLinear)
+        ):
+            module.weight.data.uniform_(-self.config.decoder_init_range, self.config.decoder_init_range)
+            if module.bias is not None:
+                module.bias.data.zero_()
+        if isinstance(module, nn.Embedding):
+            module.weight.data.uniform_(-self.config.embedding_init_range, self.config.embedding_init_range)
+class AMPLIFY(AMPLIFYPreTrainedModel):
+    """The main model class."""
+    def __init__(self, config: AMPLIFYConfig, **kwargs):
+        """Initialize a AMPLIFY model.
+        Args:
+            config (AMPLIFYConfig): The configuration of the model.
+            **kwargs: Additional arguments.
+        """
+        super().__init__(config)
+        self.config = config
+        self.encoder = nn.Embedding(
+            config.vocab_size,
+            config.hidden_size,
+            padding_idx=config.pad_token_id,
+            dtype=config.torch_dtype,
+        )
+        if config.layer_norm_after_embedding:
+            self.layer_norm_1 = (
+                transformer_engine.pytorch.RMSNorm(
+                    config.hidden_size, config.norm_eps, params_dtype=config.torch_dtype
+                )
+                if config.rms_norm
+                else transformer_engine.pytorch.LayerNorm(
+                    config.hidden_size, config.norm_eps, params_dtype=config.torch_dtype
+                )
+            )
+        if config.hidden_act.lower() == "swiglu":
+            # To keep the number of parameters and the amount of computation constant, we reduce the
+            # number of hidden units by a factor of 2/3 (https://arxiv.org/pdf/2002.05202.pdf) and
+            # make it a multiple of 8 to avoid RuntimeError due to misaligned operand
+            multiple_of = 8
+            intermediate_size = int(2 * config.intermediate_size / 3)
+            intermediate_size = multiple_of * ((intermediate_size + multiple_of - 1) // multiple_of)
+        self.transformer_encoder = nn.ModuleList()
+        for layer_num in range(config.num_hidden_layers):
+            self.transformer_encoder.append(
+                transformer_engine.pytorch.TransformerLayer(
+                    hidden_size=config.hidden_size,
+                    ffn_hidden_size=intermediate_size,
+                    num_attention_heads=config.num_attention_heads,
+                    layernorm_epsilon=config.norm_eps,
+                    hidden_dropout=config.dropout_prob,
+                    attention_dropout=config.dropout_prob,
+                    apply_residual_connection_post_layernorm=False,
+                    layer_type="encoder",
+                    self_attn_mask_type="padding",
+                    normalization="RMSNorm" if config.rms_norm else "LayerNorm",
+                    fuse_qkv_params=True,
+                    qkv_weight_interleaved=True,
+                    output_layernorm=False,
+                    bias=False,
+                    activation=config.hidden_act.lower(),
+                    attn_input_format="bshd",
+                    layer_number=layer_num + 1,
+                    name="encoder_block",
+                    window_size=(-1, -1),
+                    rotary_pos_interleaved=True,
+                    seq_length=config.max_length,
+                    params_dtype=config.torch_dtype,
+                )
+            )
+        self.freqs_cis = RotaryPositionEmbedding(config.hidden_size // config.num_attention_heads, interleaved=True)(
+            config.max_length
+        )
+        # Initialize weights and apply final processing
+        self.post_init()
+    def forward(
+        self,
+        input_ids,
+        attention_mask=None,
+        output_hidden_states=False,
+        output_attentions=False,
+        labels=None,
+        **kwargs,
+    ) -> BaseModelOutput:
+        """Forward pass of the AMPLIFY model.
+        Args:
+            input_ids (torch.Tensor): The input ids.
+            attention_mask (torch.Tensor): The attention mask.
+            output_hidden_states (bool): Whether to output the hidden states.
+            output_attentions (bool): Whether to output the attention weights.
+            labels (torch.Tensor): The labels.
+            **kwargs: Additional arguments.
+        Returns:
+            BaseModelOutput: The output of the model.
+        """
+        # Initialize
+        hidden_states = []
+        # Attention mask
+        if attention_mask is not None and attention_mask.dtype is torch.int64:
+            # TE expects a boolean attention mask, where "True" indicates a token to be masked.
+            attention_mask = ~attention_mask.to(bool)
+        # RoPE
+        self.freqs_cis = self.freqs_cis.to(input_ids.device, non_blocking=True)
+        freqs_cis = self.freqs_cis[: input_ids.shape[1]]
+        # Embedding
+        x = self.encoder(input_ids)
+        if self.config.layer_norm_after_embedding:
+            x = self.layer_norm_1(x)
+        # Transformer encoder
+        for layer in self.transformer_encoder:
+            x = layer(x, attention_mask, rotary_pos_emb=freqs_cis)
+            if output_hidden_states:
+                hidden_states.append(x)
+            if output_attentions:
+                raise ValueError("output_attentions is not supported for TE")
+        return BaseModelOutput(
+            last_hidden_state=x,
+            hidden_states=tuple(hidden_states) if hidden_states else None,
+            attentions=None,
+        )
+class AMPLIFYForMaskedLM(AMPLIFYPreTrainedModel):
+    """AMPLIFY for masked language modeling."""
+    def __init__(self, config: AMPLIFYConfig, **kwargs):
+        """Initialize a AMPLIFYForMaskedLM model.
+        Args:
+            config (AMPLIFYConfig): The configuration of the model.
+            **kwargs: Additional arguments.
+        """
+        super().__init__(config)
+        self.amplify = AMPLIFY(config, **kwargs)
+        if config.layer_norm_before_last_layer:
+            self.decoder = transformer_engine.pytorch.LayerNormLinear(
+                config.hidden_size,
+                config.vocab_size,
+                config.norm_eps,
+                params_dtype=config.torch_dtype,
+                normalization="RMSNorm" if config.rms_norm else "LayerNorm",
+                init_method=lambda x: torch.nn.init.uniform_(
+                    x, -self.config.decoder_init_range, self.config.decoder_init_range
+                ),
+            )
+        else:
+            self.decoder = transformer_engine.pytorch.Linear(
+                config.hidden_size, config.vocab_size, params_dtype=config.torch_dtype
+            )
+    def forward(
+        self,
+        input_ids,
+        attention_mask=None,
+        output_hidden_states=False,
+        output_attentions=False,
+        labels=None,
+        **kwargs,
+    ) -> MaskedLMOutput:
+        """Forward pass of the AMPLIFYForMaskedLM model.
+        Args:
+            input_ids (torch.Tensor): The input ids.
+            attention_mask (torch.Tensor): The attention mask.
+            output_hidden_states (bool): Whether to output the hidden states.
+            output_attentions (bool): Whether to output the attention weights.
+            labels (torch.Tensor): The labels.
+            **kwargs: Additional arguments.
+        Returns:
+            MaskedLMOutput: The output of the model.
+        """
+        outputs = self.amplify(
+            input_ids,
+            attention_mask,
+            output_hidden_states,
+            output_attentions,
+            labels,
+            **kwargs,
+        )
+        # Classification head with layer norm
+        logits = self.decoder(outputs.last_hidden_state)
+        if labels is not None:
+            loss = nn.functional.cross_entropy(logits.view(-1, logits.size(-1)), labels.view(-1))
+        else:
+            loss = None
+        # Return logits or the output of the last hidden layer
+        return MaskedLMOutput(
+            loss=loss,
+            logits=logits,
+            hidden_states=outputs.hidden_states,
+        )

config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "_name_": "AMPLIFY",
+  "architectures": [
+    "AMPLIFYForMaskedLM"
+  ],
+  "att_bias": false,
+  "auto_map": {
+    "AutoConfig": "amplify_te.AMPLIFYConfig",
+    "AutoModel": "amplify_te.AMPLIFY",
+    "AutoModelForMaskedLM": "amplify_te.AMPLIFYForMaskedLM"
+  },
+  "bos_token_id": 3,
+  "decoder_init_range": 0.02,
+  "dropout_prob": 0,
+  "embedding_init_range": 0.02,
+  "eos_token_id": 4,
+  "ffn_bias": false,
+  "hidden_act": "SwiGLU",
+  "hidden_size": 640,
+  "intermediate_size": 2560,
+  "layer_norm_after_embedding": false,
+  "layer_norm_before_last_layer": true,
+  "mask_token_id": 2,
+  "max_length": 2048,
+  "model_type": "AMPLIFY",
+  "norm_eps": 1e-05,
+  "num_attention_heads": 10,
+  "num_hidden_layers": 24,
+  "other_special_token_ids": null,
+  "pad_token_id": 0,
+  "pre_activation_layer_norm": true,
+  "rms_norm": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.54.0.dev0",
+  "unk_token_id": 1,
+  "vocab_path": "conf/tokenizer/amplify_vocab.txt",
+  "vocab_size": 27
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4c24689ec4865382b883b0f7bfbb4b504dc3d671c71270dcd209422fa53553df
+size 473138596

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,154 @@

+{
+  "version": "1.0",
+  "truncation": null,
+  "padding": null,
+  "added_tokens": [
+    {
+      "id": 0,
+      "content": "<pad>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 1,
+      "content": "<unk>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 2,
+      "content": "<mask>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 3,
+      "content": "<bos>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 4,
+      "content": "<eos>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    }
+  ],
+  "normalizer": null,
+  "pre_tokenizer": {
+    "type": "Split",
+    "pattern": {
+      "String": ""
+    },
+    "behavior": "Removed",
+    "invert": false
+  },
+  "post_processor": {
+    "type": "TemplateProcessing",
+    "single": [
+      {
+        "SpecialToken": {
+          "id": "<bos>",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "<eos>",
+          "type_id": 0
+        }
+      }
+    ],
+    "pair": [
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "B",
+          "type_id": 1
+        }
+      }
+    ],
+    "special_tokens": {
+      "<bos>": {
+        "id": "<bos>",
+        "ids": [
+          3
+        ],
+        "tokens": [
+          "<bos>"
+        ]
+      },
+      "<eos>": {
+        "id": "<eos>",
+        "ids": [
+          4
+        ],
+        "tokens": [
+          "<eos>"
+        ]
+      }
+    }
+  },
+  "decoder": null,
+  "model": {
+    "type": "WordPiece",
+    "unk_token": "<unk>",
+    "continuing_subword_prefix": "##",
+    "max_input_chars_per_word": 100,
+    "vocab": {
+      "<pad>": 0,
+      "<unk>": 1,
+      "<mask>": 2,
+      "<bos>": 3,
+      "<eos>": 4,
+      "|": 5,
+      "L": 6,
+      "A": 7,
+      "G": 8,
+      "V": 9,
+      "S": 10,
+      "E": 11,
+      "R": 12,
+      "T": 13,
+      "I": 14,
+      "D": 15,
+      "P": 16,
+      "K": 17,
+      "Q": 18,
+      "N": 19,
+      "F": 20,
+      "Y": 21,
+      "M": 22,
+      "H": 23,
+      "W": 24,
+      "C": 25,
+      "B": 26
+    }
+  }
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<mask>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<bos>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<eos>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<bos>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<eos>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 2048,
+  "pad_token": "<pad>",
+  "padding_side": "right",
+  "tokenizer_class": "PreTrainedTokenizerFast",
+  "truncation_side": "right",
+  "unk_token": "<unk>"
+}