discourse-ai/tokenizers/all-mpnet-base-v2.json

1 line
455 KiB
JSON
Raw Permalink Normal View History

{"version":"1.0","truncation":null,"padding":null,"added_tokens":[{"id":0,"special":true,"content":"<s>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":1,"special":true,"content":"<pad>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":2,"special":true,"content":"</s>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":3,"special":true,"content":"<unk>","single_word":false,"lstrip":false,"rstrip":false,"normalized":true},{"id":104,"special":true,"content":"[UNK]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":30526,"special":true,"content":"<mask>","single_word":false,"lstrip":true,"rstrip":false,"normalized":false}],"normalizer":{"type":"BertNormalizer","clean_text":true,"handle_chinese_chars":true,"strip_accents":null,"lowercase":true},"pre_tokenizer":{"type":"BertPreTokenizer"},"post_processor":{"type":"RobertaProcessing","sep":["</s>",2],"cls":["<s>",0],"trim_offsets":true,"add_prefix_space":false},"decoder":{"type":"WordPiece","prefix":"##","cleanup":true},"model":{"type":"WordPiece","unk_token":"[UNK]","continuing_subword_prefix":"##","max_input_chars_per_word":100,"vocab":{"<s>":0,"<pad>":1,"</s>":2,"<unk>":3,"[PAD]":4,"[unused0]":5,"[unused1]":6,"[unused2]":7,"[unused3]":8,"[unused4]":9,"[unused5]":10,"[unused6]":11,"[unused7]":12,"[unused8]":13,"[unused9]":14,"[unused10]":15,"[unused11]":16,"[unused12]":17,"[unused13]":18,"[unused14]":19,"[unused15]":20,"[unused16]":21,"[unused17]":22,"[unused18]":23,"[unused19]":24,"[unused20]":25,"[unused21]":26,"[unused22]":27,"[unused23]":28,"[unused24]":29,"[unused25]":30,"[unused26]":31,"[unused27]":32,"[unused28]":33,"[unused29]":34,"[unused30]":35,"[unused31]":36,"[unused32]":37,"[unused33]":38,"[unused34]":39,"[unused35]":40,"[unused36]":41,"[unused37]":42,"[unused38]":43,"[unused39]":44,"[unused40]":45,"[unused41]":46,"[unused42]":47,"[unused43]":48,"[unused44]":49,"[unused45]":50,"[unused46]":51,"[unused47]":52,"[unused48]":53,"[unused49]":54,"[unused50]":55,"[unused51]":56,"[unused52]":57,"[unused53]":58,"[unused54]":59,"[unused55]":60,"[unused56]":61,"[unused57]":62,"[unused58]":63,"[unused59]":64,"[unused60]":65,"[unused61]":66,"[unused62]":67,"[unused63]":68,"[unused64]":69,"[unused65]":70,"[unused66]":71,"[unused67]":72,"[unused68]":73,"[unused69]":74,"[unused70]":75,"[unused71]":76,"[unused72]":77,"[unused73]":78,"[unused74]":79,"[unused75]":80,"[unused76]":81,"[unused77]":82,"[unused78]":83,"[unused79]":84,"[unused80]":85,"[unused81]":86,"[unused82]":87,"[unused83]":88,"[unused84]":89,"[unused85]":90,"[unused86]":91,"[unused87]":92,"[unused88]":93,"[unused89]":94,"[unused90]":95,"[unused91]":96,"[unused92]":97,"[unused93]":98,"[unused94]":99,"[unused95]":100,"[unused96]":101,"[unused97]":102,"[unused98]":103,"[UNK]":104,"[CLS]":105,"[SEP]":106,"[MASK]":107,"[unused99]":108,"[unused100]":109,"[unused101]":110,"[unused102]":111,"[unused103]":112,"[unused104]":113,"[unused105]":114,"[unused106]":115,"[unused107]":116,"[unused108]":117,"[unused109]":118,"[unused110]":119,"[unused111]":120,"[unused112]":121,"[unused113]":122,"[unused114]":123,"[unused115]":124,"[unused116]":125,"[unused117]":126,"[unused118]":127,"[unused119]":128,"[unused120]":129,"[unused121]":130,"[unused122]":131,"[unused123]":132,"[unused124]":133,"[unused125]":134,"[unused126]":135,"[unused127]":136,"[unused128]":137,"[unused129]":138,"[unused130]":139,"[unused131]":140,"[unused132]":141,"[unused133]":142,"[unused134]":143,"[unused135]":144,"[unused136]":145,"[unused137]":146,"[unused138]":147,"[unused139]":148,"[unused140]":149,"[unused141]":150,"[unused142]":151,"[unused143]":152,"[unused144]":153,"[unused145]":154,"[unused146]":155,"[unused147]":156,"[unused148]":157,"[unused149]":158,"[unused150]":159,"[unused151]":160,"[unused152]":161,"[unused153]":162,"[unused154]":163,"[unused155]":164,"[unused156]":165,"[unused157]":166,"[unused158]":167,"[unused159]":168,"[unused160]":169,"[unused161]":170,"[unused162]":171,"[unused163]":172,"[unused164]":173,"[unused165]":174,"[unused166]":