import gradio as gr
from transformers import AutoTokenizer, AutoModelForTokenClassification
import torch

"""
    THIS CODE LAUNCHES A DEMO FOR THE LANGTOK MODEL
    created 8.11.2024 by emmabedna

    It uses gradio library to launch a model demo
"""


# loading the tokenizer and the model from huggingface
tokenizer = AutoTokenizer.from_pretrained("emmabedna/bert-base-multilingual-cased-finetuned-langtok")
model = AutoModelForTokenClassification.from_pretrained("emmabedna/bert-base-multilingual-cased-finetuned-langtok")


int_to_label = { 0: "arb", 1: "ces", 2: "dan", 3: "deu", 4: "eng", \
    5: "fra", 6: "hat", 7: "ita", 8: "jpn", 9: "lin", 10: "nld", 11: "pol", \
    12: "por", 13: "rus", 14: "slk", 15: "spa", 16: "swe", 17: "ukr", -100: "xxx" }

# this method sends input into the model
# the output is a list of tuples
# each tuple contains a token and its label
def langtok(input: str):
    
    # getting the model output
    tokenized_input = tokenizer(input, return_tensors="pt")
    with torch.no_grad():
        logits = model(**tokenized_input).logits
    predictions = torch.argmax(logits, dim=2)
    result = [t.item() for t in predictions[0]]
    tokens = tokenizer.convert_ids_to_tokens(tokenized_input["input_ids"][0])

    # creating a list of tuples that holds the information about how each token should be highlighted
    highlighted_tokens = []

    for i in range(len(tokens)):
        # [CLS] and [SEP] tokens that mark the beginning and end of a sentence are not shown in the demo
        if tokens[i] == "[CLS]" or tokens[i] == "[SEP]":
            continue
                
        # adding the token into the list of tuples that will be returned by this function
        # each token is followed by a white space
        highlighted_tokens.extend([(tokens[i], int_to_label[result[i]]), (" ", None)])

    return highlighted_tokens

demo = gr.Interface(
    langtok,
    gr.Textbox(placeholder="Please enter a sentence. Each word will be marked with the language it belongs to, based on the analysis by our system."),
    "highlight",
    examples=[
        ["I feel like heute wird es regen, ale nám to nevadí."],
        ["Přes řeku stál vysoký most, most of it was wooden."],
        ["Mamma mia, this spaghetti is assolutamente delizioso!"],
        ["Excusez moi, kde jsou tu les toilettes?"],
        ["Por favor, can you tell me, kde je nejbližší supermercado?"],
        ["Välkommen till IKEA, after getting the furniture for our petit appartement we can go to the restaurant and have some köttbullar."]
    ],
)

demo.launch(share=True)