Spaces:
Running
Running
import streamlit as st | |
import os | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from huggingface_hub import login | |
# App title | |
st.set_page_config(page_title="GenAI Chat Bot") | |
# HF中国镜像站 Credentials | |
with st.sidebar: | |
st.title('GenAI Chat Bot') | |
#st.write('This is a generative AI Chat Bot.') | |
# Use HF中国镜像站 API Key from secrets or environment | |
api_key = os.getenv("llama3") | |
if not api_key: | |
st.error("API key is missing!") | |
st.stop() | |
# Authenticate with HF中国镜像站 Hub | |
try: | |
login(api_key) | |
st.success('API successfully authenticated!', icon='✅') | |
except Exception as e: | |
st.error(f"Authentication failed: {e}") | |
st.stop() | |
st.subheader('Models and parameters') | |
# Model selection categories | |
model_options = { | |
"Basic": [ | |
"meta-llama/Llama-3.2-1B" | |
], | |
"Basic-Medium": [ | |
"meta-llama/Llama-3.2-1B-Instruct" | |
], | |
"Medium-Fine": [ | |
"meta-llama/Llama-3.2-3B" | |
], | |
"Finest": [ | |
"meta-llama/Llama-3.2-3B-Instruct" | |
] | |
} | |
# Select category (default set to "Basic-Medium") | |
selected_category = st.sidebar.selectbox('Select Model Category', ["Basic", "Basic-Medium", "Medium-Fine", "Finest"], index=1) | |
# Set selected model based on category | |
selected_model = model_options[selected_category][0] | |
# Slider inputs for parameters | |
temperature = st.sidebar.slider('temperature', min_value=0.01, max_value=1.0, value=0.3, step=0.01) | |
top_p = st.sidebar.slider('top_p', min_value=0.01, max_value=1.0, value=0.9, step=0.01) | |
max_length = st.sidebar.slider('max_length', min_value=20, max_value=80, value=65, step=5) | |
st.markdown("Disclaimer: The performance and speed of this GenAI tool depends on the machine configuration and model selection") | |
# Store LLM generated responses | |
if "messages" not in st.session_state.keys(): | |
st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}] | |
# Display or clear chat messages | |
for message in st.session_state.messages: | |
with st.chat_message(message["role"]): | |
st.write(message["content"]) | |
def clear_chat_history(): | |
st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}] | |
st.sidebar.button('Clear Chat History', on_click=clear_chat_history) | |
# Load the tokenizer and model | |
try: | |
tokenizer = AutoTokenizer.from_pretrained(selected_model) | |
model = AutoModelForCausalLM.from_pretrained(selected_model, torch_dtype=torch.bfloat16, device_map="auto") | |
except Exception as e: | |
st.error(f"Error loading model: {e}") | |
st.stop() | |
# Function for generating response using HF中国镜像站 model | |
def generate_huggingface_response(prompt_input): | |
inputs = tokenizer(prompt_input, return_tensors="pt").to(model.device) | |
try: | |
# Generate response from the model | |
with torch.no_grad(): | |
outputs = model.generate(inputs["input_ids"], max_new_tokens=max_length, temperature=temperature, top_p=top_p, do_sample=True) | |
# Decode the generated response | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return response | |
except Exception as e: | |
st.error(f"Error generating response: {e}") | |
return "Oops! Something went wrong." | |
# User-provided prompt | |
if prompt := st.chat_input(disabled=not api_key): | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
with st.chat_message("user"): | |
st.write(prompt) | |
# Generate a new response if last message is not from assistant | |
if st.session_state.messages[-1]["role"] != "assistant": | |
with st.chat_message("assistant"): | |
with st.spinner("Thinking..."): | |
response = generate_huggingface_response(prompt) | |
placeholder = st.empty() | |
full_response = '' | |
for item in response: | |
full_response += item | |
placeholder.markdown(full_response) | |
placeholder.markdown(full_response) | |
message = {"role": "assistant", "content": full_response} | |
st.session_state.messages.append(message) | |