{ | |
"version": "1.0", | |
"truncation": null, | |
"padding": null, | |
"added_tokens": [ | |
{ | |
"id": 0, | |
"content": "UNK", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 1, | |
"content": "PAD", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 2, | |
"content": "WORD_BOUNDARY", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 3, | |
"content": "UTT_BOUNDARY", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
} | |
], | |
"normalizer": { | |
"type": "Sequence", | |
"normalizers": [ | |
{ | |
"type": "Strip", | |
"strip_left": true, | |
"strip_right": true | |
} | |
] | |
}, | |
"pre_tokenizer": { | |
"type": "WhitespaceSplit" | |
}, | |
"post_processor": { | |
"type": "TemplateProcessing", | |
"single": [ | |
{ | |
"SpecialToken": { | |
"id": "UTT_BOUNDARY", | |
"type_id": 0 | |
} | |
}, | |
{ | |
"Sequence": { | |
"id": "A", | |
"type_id": 0 | |
} | |
} | |
], | |
"pair": [ | |
{ | |
"SpecialToken": { | |
"id": "UTT_BOUNDARY", | |
"type_id": 0 | |
} | |
}, | |
{ | |
"Sequence": { | |
"id": "A", | |
"type_id": 0 | |
} | |
}, | |
{ | |
"SpecialToken": { | |
"id": "UTT_BOUNDARY", | |
"type_id": 0 | |
} | |
}, | |
{ | |
"Sequence": { | |
"id": "B", | |
"type_id": 1 | |
} | |
} | |
], | |
"special_tokens": { | |
"UTT_BOUNDARY": { | |
"id": "UTT_BOUNDARY", | |
"ids": [ | |
3 | |
], | |
"tokens": [ | |
"UTT_BOUNDARY" | |
] | |
} | |
} | |
}, | |
"decoder": null, | |
"model": { | |
"type": "WordLevel", | |
"vocab": { | |
"UNK": 0, | |
"PAD": 1, | |
"WORD_BOUNDARY": 2, | |
"UTT_BOUNDARY": 3, | |
"a˧˥": 4, | |
"u˧˥": 5, | |
"a˥": 6, | |
"au": 7, | |
"n": 8, | |
"a˥˩": 9, | |
"ʃ̺": 10, | |
"ɻ̩˥˩": 11, | |
"ə˧˥": 12, | |
"m": 13, | |
"ɤ": 14, | |
"p": 15, | |
"j": 16, | |
"e˧˥": 17, | |
"kʰ": 18, | |
"k": 19, | |
"ɤ˥˩": 20, | |
"w": 21, | |
"o˥": 22, | |
"t̠ʃ̺ʰ": 23, | |
"ə˥": 24, | |
"ŋ": 25, | |
"t": 26, | |
"ʊ˥": 27, | |
"ɕ": 28, | |
"i": 29, | |
"a": 30, | |
"l": 31, | |
"au˧˩˧": 32, | |
"x": 33, | |
"u˧˩˧": 34, | |
"i˥": 35, | |
"ei˧˩˧": 36, | |
"pʰ": 37, | |
"i˧˥": 38, | |
"ai˧˥": 39, | |
"ou˧˩˧": 40, | |
"ɤ˧˥": 41, | |
"o˧˩˧": 42, | |
"tɕ": 43, | |
"au˥˩": 44, | |
"ts": 45, | |
"ə˧˩˧": 46, | |
"ɤ˥": 47, | |
"ei˧˥": 48, | |
"ʊ˧˥": 49, | |
"i˧˩˧": 50, | |
"t̠ʃ̺": 51, | |
"ɻ̩˧˩˧": 52, | |
"ei˥˩": 53, | |
"s": 54, | |
"u˥˩": 55, | |
"ɹ̪̩": 56, | |
"ai˥": 57, | |
"u˥": 58, | |
"tɕʰ": 59, | |
"a˧˩˧": 60, | |
"ai˥˩": 61, | |
"ɛ˥˩": 62, | |
"f": 63, | |
"i˥˩": 64, | |
"y˥˩": 65, | |
"au˧˥": 66, | |
"ɻ": 67, | |
"ou˥˩": 68, | |
"e˥": 69, | |
"tʰ": 70, | |
"ɹ̪̩˥˩": 71, | |
"ɛ˧˥": 72, | |
"au˥": 73, | |
"ou˧˥": 74, | |
"e˧˩˧": 75, | |
"ɛ˥": 76, | |
"ɻ̩˥": 77, | |
"ɥ": 78, | |
"ɹ̪̩˧˩˧": 79, | |
"ai˧˩˧": 80, | |
"ou˥": 81, | |
"o˥˩": 82, | |
"ɛ˧˩˧": 83, | |
"ʊ˧˩˧": 84, | |
"ɔ˥": 85, | |
"tsʰ": 86, | |
"ei": 87, | |
"ə˥˩": 88, | |
"o": 89, | |
"ʊ˥˩": 90, | |
"ou": 91, | |
"ɤ˧˩˧": 92, | |
"o˧˥": 93, | |
"ei˥": 94, | |
"e˥˩": 95, | |
"ɚ˧˩˧": 96, | |
"y˥": 97, | |
"ɚ˥˩": 98, | |
"y˧˥": 99, | |
"ɻ̩": 100, | |
"y˧˩˧": 101, | |
"ɹ̪̩˥": 102, | |
"ɻ̩˧˥": 103, | |
"u": 104, | |
"ə": 105, | |
"ai": 106, | |
"ʊ": 107, | |
"e": 108, | |
"ɚ˧˥": 109, | |
"ɔ˥˩": 110, | |
"ɹ̪̩˧˥": 111, | |
"ɛ": 112, | |
"y": 113, | |
"m˧˥": 114 | |
}, | |
"unk_token": "UNK" | |
} | |
} |