File size: 920 Bytes
e31682d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
from typing import Dict, List, Any
from transformers import MBart50TokenizerFast, MBartForConditionalGeneration
class EndpointHandler:
def __init__(self, path=""):
self.tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
self.model = MBartForConditionalGeneration.from_pretrained(path)
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
inputs = data.pop("inputs", data)
source_lang = data.pop("src_lang", None)
target_lang = data.pop("target_lang", None)
self.tokenizer.src_lang = source_lang
encoded = self.tokenizer(inputs, return_tensors='pt')
generated_tokens = self.model.generate(
**encoded, forced_bos_token_id=self.tokenizer.lang_code_to_id[target_lang]
)
d = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
return d
|