import gradio as gr from transformers import AutoTokenizer, AutoModelForTokenClassification import torch """ THIS CODE LAUNCHES A DEMO FOR THE LANGTOK MODEL created 8.11.2024 by emmabedna It uses gradio library to launch a model demo """ # loading the tokenizer and the model from huggingface tokenizer = AutoTokenizer.from_pretrained("emmabedna/bert-base-multilingual-cased-finetuned-langtok") model = AutoModelForTokenClassification.from_pretrained("emmabedna/bert-base-multilingual-cased-finetuned-langtok") int_to_label = { 0: "arb", 1: "ces", 2: "dan", 3: "deu", 4: "eng", \ 5: "fra", 6: "hat", 7: "ita", 8: "jpn", 9: "lin", 10: "nld", 11: "pol", \ 12: "por", 13: "rus", 14: "slk", 15: "spa", 16: "swe", 17: "ukr", -100: "xxx" } # this method sends input into the model # the output is a list of tuples # each tuple contains a token and its label def langtok(input: str): # getting the model output tokenized_input = tokenizer(input, return_tensors="pt") with torch.no_grad(): logits = model(**tokenized_input).logits predictions = torch.argmax(logits, dim=2) result = [t.item() for t in predictions[0]] tokens = tokenizer.convert_ids_to_tokens(tokenized_input["input_ids"][0]) # creating a list of tuples that holds the information about how each token should be highlighted highlighted_tokens = [] for i in range(len(tokens)): # [CLS] and [SEP] tokens that mark the beginning and end of a sentence are not shown in the demo if tokens[i] == "[CLS]" or tokens[i] == "[SEP]": continue # adding the token into the list of tuples that will be returned by this function # each token is followed by a white space highlighted_tokens.extend([(tokens[i], int_to_label[result[i]]), (" ", None)]) return highlighted_tokens demo = gr.Interface( langtok, gr.Textbox(placeholder="Please enter a sentence. Each word will be marked with the language it belongs to, based on the analysis by our system."), "highlight", examples=[ ["I feel like heute wird es regen, ale nám to nevadí."], ["Přes řeku stál vysoký most, most of it was wooden."], ["Mamma mia, this spaghetti is assolutamente delizioso!"], ["Excusez moi, kde jsou tu les toilettes?"], ["Por favor, can you tell me, kde je nejbližší supermercado?"], ["Välkommen till IKEA, after getting the furniture for our petit appartement we can go to the restaurant and have some köttbullar."] ], ) demo.launch(share=True)