discourse-ai/tokenizers/bert-base-uncased.json

1 line
455 KiB
JSON
Raw Permalink Normal View History

{"version":"1.0","truncation":null,"padding":null,"added_tokens":[{"id":0,"special":true,"content":"[PAD]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":100,"special":true,"content":"[UNK]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":101,"special":true,"content":"[CLS]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":102,"special":true,"content":"[SEP]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":103,"special":true,"content":"[MASK]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false}],"normalizer":{"type":"BertNormalizer","clean_text":true,"handle_chinese_chars":true,"strip_accents":null,"lowercase":true},"pre_tokenizer":{"type":"BertPreTokenizer"},"post_processor":{"type":"TemplateProcessing","single":[{"SpecialToken":{"id":"[CLS]","type_id":0}},{"Sequence":{"id":"A","type_id":0}},{"SpecialToken":{"id":"[SEP]","type_id":0}}],"pair":[{"SpecialToken":{"id":"[CLS]","type_id":0}},{"Sequence":{"id":"A","type_id":0}},{"SpecialToken":{"id":"[SEP]","type_id":0}},{"Sequence":{"id":"B","type_id":1}},{"SpecialToken":{"id":"[SEP]","type_id":1}}],"special_tokens":{"[CLS]":{"id":"[CLS]","ids":[101],"tokens":["[CLS]"]},"[SEP]":{"id":"[SEP]","ids":[102],"tokens":["[SEP]"]}}},"decoder":{"type":"WordPiece","prefix":"##","cleanup":true},"model":{"unk_token":"[UNK]","continuing_subword_prefix":"##","max_input_chars_per_word":100,"vocab":{"[PAD]":0,"[unused0]":1,"[unused1]":2,"[unused2]":3,"[unused3]":4,"[unused4]":5,"[unused5]":6,"[unused6]":7,"[unused7]":8,"[unused8]":9,"[unused9]":10,"[unused10]":11,"[unused11]":12,"[unused12]":13,"[unused13]":14,"[unused14]":15,"[unused15]":16,"[unused16]":17,"[unused17]":18,"[unused18]":19,"[unused19]":20,"[unused20]":21,"[unused21]":22,"[unused22]":23,"[unused23]":24,"[unused24]":25,"[unused25]":26,"[unused26]":27,"[unused27]":28,"[unused28]":29,"[unused29]":30,"[unused30]":31,"[unused31]":32,"[unused32]":33,"[unused33]":34,"[unused34]":35,"[unused35]":36,"[unused36]":37,"[unused37]":38,"[unused38]":39,"[unused39]":40,"[unused40]":41,"[unused41]":42,"[unused42]":43,"[unused43]":44,"[unused44]":45,"[unused45]":46,"[unused46]":47,"[unused47]":48,"[unused48]":49,"[unused49]":50,"[unused50]":51,"[unused51]":52,"[unused52]":53,"[unused53]":54,"[unused54]":55,"[unused55]":56,"[unused56]":57,"[unused57]":58,"[unused58]":59,"[unused59]":60,"[unused60]":61,"[unused61]":62,"[unused62]":63,"[unused63]":64,"[unused64]":65,"[unused65]":66,"[unused66]":67,"[unused67]":68,"[unused68]":69,"[unused69]":70,"[unused70]":71,"[unused71]":72,"[unused72]":73,"[unused73]":74,"[unused74]":75,"[unused75]":76,"[unused76]":77,"[unused77]":78,"[unused78]":79,"[unused79]":80,"[unused80]":81,"[unused81]":82,"[unused82]":83,"[unused83]":84,"[unused84]":85,"[unused85]":86,"[unused86]":87,"[unused87]":88,"[unused88]":89,"[unused89]":90,"[unused90]":91,"[unused91]":92,"[unused92]":93,"[unused93]":94,"[unused94]":95,"[unused95]":96,"[unused96]":97,"[unused97]":98,"[unused98]":99,"[UNK]":100,"[CLS]":101,"[SEP]":102,"[MASK]":103,"[unused99]":104,"[unused100]":105,"[unused101]":106,"[unused102]":107,"[unused103]":108,"[unused104]":109,"[unused105]":110,"[unused106]":111,"[unused107]":112,"[unused108]":113,"[unused109]":114,"[unused110]":115,"[unused111]":116,"[unused112]":117,"[unused113]":118,"[unused114]":119,"[unused115]":120,"[unused116]":121,"[unused117]":122,"[unused118]":123,"[unused119]":124,"[unused120]":125,"[unused121]":126,"[unused122]":127,"[unused123]":128,"[unused124]":129,"[unused125]":130,"[unused126]":131,"[unused127]":132,"[unused128]":133,"[unused129]":134,"[unused130]":135,"[unused131]":136,"[unused132]":137,"[unused133]":138,"[unused134]":139,"[unused135]":140,"[unused136]":141,"[unused137]":142,"[unused138]":143,"[unused139]":144,"[unused140]":145,"[unused141]":146,"[unused142]":147,"[unused143]":148,"[unused144]":149,"[unused145]":150,"[unused146]":151,"[unused147]":152,"[unused148]":153,"[unused149]":154,"[unused150]":155,"[unused151]":156,"[unused152]":157,"[unused153]":158,