File size: 920 Bytes
e31682d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from typing import Dict, List, Any

from transformers import MBart50TokenizerFast, MBartForConditionalGeneration


class EndpointHandler:
    def __init__(self, path=""):
        self.tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
        self.model = MBartForConditionalGeneration.from_pretrained(path)

    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        inputs = data.pop("inputs", data)
        source_lang = data.pop("src_lang", None)
        target_lang = data.pop("target_lang", None)

        self.tokenizer.src_lang = source_lang

        encoded = self.tokenizer(inputs, return_tensors='pt')
        generated_tokens = self.model.generate(
            **encoded, forced_bos_token_id=self.tokenizer.lang_code_to_id[target_lang]
        )
        d = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
        return d