Upload tokenizer
Browse files- tokenizer.json +0 -0
- tokenizer_config.json +3 -5
tokenizer.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
CHANGED
|
@@ -945,17 +945,15 @@
|
|
| 945 |
"<extra_id_0>"
|
| 946 |
],
|
| 947 |
"bos_token": "<s>",
|
| 948 |
-
"clean_up_tokenization_spaces":
|
| 949 |
"cls_token": "<s>",
|
| 950 |
"eos_token": "</s>",
|
| 951 |
"errors": "replace",
|
|
|
|
| 952 |
"mask_token": "<mask>",
|
| 953 |
-
"max_length":
|
| 954 |
"model_max_length": 512,
|
| 955 |
-
"pad_to_multiple_of": null,
|
| 956 |
"pad_token": "<pad>",
|
| 957 |
-
"pad_token_type_id": 0,
|
| 958 |
-
"padding_side": "right",
|
| 959 |
"sep_token": "</s>",
|
| 960 |
"stride": 0,
|
| 961 |
"tokenizer_class": "RobertaTokenizer",
|
|
|
|
| 945 |
"<extra_id_0>"
|
| 946 |
],
|
| 947 |
"bos_token": "<s>",
|
| 948 |
+
"clean_up_tokenization_spaces": false,
|
| 949 |
"cls_token": "<s>",
|
| 950 |
"eos_token": "</s>",
|
| 951 |
"errors": "replace",
|
| 952 |
+
"extra_special_tokens": {},
|
| 953 |
"mask_token": "<mask>",
|
| 954 |
+
"max_length": 512,
|
| 955 |
"model_max_length": 512,
|
|
|
|
| 956 |
"pad_token": "<pad>",
|
|
|
|
|
|
|
| 957 |
"sep_token": "</s>",
|
| 958 |
"stride": 0,
|
| 959 |
"tokenizer_class": "RobertaTokenizer",
|