Spaces:
Running
Running
File size: 4,224 Bytes
48a1a10 a66b14e 0bcab7d 5c93950 a66b14e 48a1a10 d2223a7 5c93950 48a1a10 d2223a7 6e63033 48a1a10 5ba115b 5c93950 48a1a10 ace5158 48a1a10 35a0b97 48a1a10 ace5158 48a1a10 a66b14e 48a1a10 6e63033 bf2e6de 6e63033 bf2e6de 6e63033 bf2e6de 92c6650 bf2e6de a66b14e 6e63033 ace5158 48a1a10 7d3c1b0 9f99d98 7bc4322 ace5158 0bcab7d 48a1a10 0bcab7d 48a1a10 0bcab7d a66b14e 0bcab7d 48a1a10 0bcab7d 48a1a10 5ba115b 48a1a10 df61fc1 0bcab7d 48a1a10 0bcab7d df61fc1 0bcab7d 48a1a10 0bcab7d 48a1a10 df61fc1 48a1a10 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import streamlit as st
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login
# App title
st.set_page_config(page_title="GenAI Chat Bot")
# HF中国镜像站 Credentials
with st.sidebar:
st.title('GenAI Chat Bot')
#st.write('This is a generative AI Chat Bot.')
# Use HF中国镜像站 API Key from secrets or environment
api_key = os.getenv("llama3")
if not api_key:
st.error("API key is missing!")
st.stop()
# Authenticate with HF中国镜像站 Hub
try:
login(api_key)
st.success('API successfully authenticated!', icon='✅')
except Exception as e:
st.error(f"Authentication failed: {e}")
st.stop()
st.subheader('Models and parameters')
# Model selection categories
model_options = {
"Basic": [
"meta-llama/Llama-3.2-1B"
],
"Basic-Medium": [
"meta-llama/Llama-3.2-1B-Instruct"
],
"Medium-Fine": [
"meta-llama/Llama-3.2-3B"
],
"Finest": [
"meta-llama/Llama-3.2-3B-Instruct"
]
}
# Select category (default set to "Basic-Medium")
selected_category = st.sidebar.selectbox('Select Model Category', ["Basic", "Basic-Medium", "Medium-Fine", "Finest"], index=1)
# Set selected model based on category
selected_model = model_options[selected_category][0]
# Slider inputs for parameters
temperature = st.sidebar.slider('temperature', min_value=0.01, max_value=1.0, value=0.3, step=0.01)
top_p = st.sidebar.slider('top_p', min_value=0.01, max_value=1.0, value=0.9, step=0.01)
max_length = st.sidebar.slider('max_length', min_value=20, max_value=80, value=65, step=5)
st.markdown("Disclaimer: The performance and speed of this GenAI tool depends on the machine configuration and model selection")
# Store LLM generated responses
if "messages" not in st.session_state.keys():
st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
# Display or clear chat messages
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.write(message["content"])
def clear_chat_history():
st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
st.sidebar.button('Clear Chat History', on_click=clear_chat_history)
# Load the tokenizer and model
try:
tokenizer = AutoTokenizer.from_pretrained(selected_model)
model = AutoModelForCausalLM.from_pretrained(selected_model, torch_dtype=torch.bfloat16, device_map="auto")
except Exception as e:
st.error(f"Error loading model: {e}")
st.stop()
# Function for generating response using HF中国镜像站 model
def generate_huggingface_response(prompt_input):
inputs = tokenizer(prompt_input, return_tensors="pt").to(model.device)
try:
# Generate response from the model
with torch.no_grad():
outputs = model.generate(inputs["input_ids"], max_new_tokens=max_length, temperature=temperature, top_p=top_p, do_sample=True)
# Decode the generated response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
except Exception as e:
st.error(f"Error generating response: {e}")
return "Oops! Something went wrong."
# User-provided prompt
if prompt := st.chat_input(disabled=not api_key):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.write(prompt)
# Generate a new response if last message is not from assistant
if st.session_state.messages[-1]["role"] != "assistant":
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
response = generate_huggingface_response(prompt)
placeholder = st.empty()
full_response = ''
for item in response:
full_response += item
placeholder.markdown(full_response)
placeholder.markdown(full_response)
message = {"role": "assistant", "content": full_response}
st.session_state.messages.append(message)
|