import gradio as gr from transformers import AutoModel, AutoTokenizer tokenizer = AutoTokenizer.from_pretrained("fnlp/moss-moon-003-sft-plugin", trust_remote_code=True) model = AutoModel.from_pretrained("fnlp/moss-moon-003-sft-plugin", trust_remote_code=True).half().cuda() model = model.eval() def predict(input, history=None): if history is None: history = [] prompt = "" for (q, r) in history: prompt += "<|Human|>: " + q + "\n" prompt += "<|MOSS|>: " + r + "\n" prompt += "<|Human|>: " + input + "\n<|MOSS|>:" inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate(**inputs, do_sample=True, temperature=0.7, top_p=0.8, repetition_penalty=1.1, max_new_tokens=512) response = tokenizer.decode(outputs[0]) completion = response[len(prompt)+2:] history = history + [(input, completion)] return history, history with gr.Blocks() as demo: gr.Markdown("moss-moon-003-sft-plugin") state = gr.State([]) chatbot = gr.Chatbot([], elem_id="chatbot").style(height=400) with gr.Row(): with gr.Column(scale=4): txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter").style(container=False) with gr.Column(scale=1): button = gr.Button("Generate") txt.submit(predict, [txt, state], [chatbot, state]) button.click(predict, [txt, state], [chatbot, state]) demo.queue().launch()