import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch import spaces # Define the model path model_path = "mrcuddle/Ministral-Instruct-2410-8B-DPO-RP" # Load the tokenizer and model tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForCausalLM.from_pretrained( model_path, device_map="auto", torch_dtype='auto' ).eval() @spaces.GPU # Define the chatbot function def chatbot(message, history): # Prepare the messages for the model messages = [ {"role": "user", "content": message} ] # Tokenize the input input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt') # Generate the response output_ids = model.generate(input_ids.to('cuda')) response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True) # Append the response to the history history.append((message, response)) return history, "" # Create the Gradio interface with gr.Blocks() as demo: gr.Markdown("## Chatbot using Transformers") # Create a Chatbot component chatbot_component = gr.Chatbot([], elem_id="chatbot") with gr.Row(): # Create a Textbox component for user input txt = gr.Textbox( show_label=False, placeholder="Type your message here...", ) # Attach the chatbot function to the Textbox component txt.submit(chatbot, [txt, chatbot_component], [chatbot_component, txt]) # Launch the Gradio interface demo.launch()