outlander23 commited on
Commit
8d90a1e
·
verified ·
1 Parent(s): f4ad4ed

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +0 -0
  2. tokenizer_config.json +3 -5
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -945,17 +945,15 @@
945
  "<extra_id_0>"
946
  ],
947
  "bos_token": "<s>",
948
- "clean_up_tokenization_spaces": true,
949
  "cls_token": "<s>",
950
  "eos_token": "</s>",
951
  "errors": "replace",
 
952
  "mask_token": "<mask>",
953
- "max_length": 128,
954
  "model_max_length": 512,
955
- "pad_to_multiple_of": null,
956
  "pad_token": "<pad>",
957
- "pad_token_type_id": 0,
958
- "padding_side": "right",
959
  "sep_token": "</s>",
960
  "stride": 0,
961
  "tokenizer_class": "RobertaTokenizer",
 
945
  "<extra_id_0>"
946
  ],
947
  "bos_token": "<s>",
948
+ "clean_up_tokenization_spaces": false,
949
  "cls_token": "<s>",
950
  "eos_token": "</s>",
951
  "errors": "replace",
952
+ "extra_special_tokens": {},
953
  "mask_token": "<mask>",
954
+ "max_length": 512,
955
  "model_max_length": 512,
 
956
  "pad_token": "<pad>",
 
 
957
  "sep_token": "</s>",
958
  "stride": 0,
959
  "tokenizer_class": "RobertaTokenizer",