File size: 4,224 Bytes
48a1a10
a66b14e
 
0bcab7d
5c93950
a66b14e
48a1a10
d2223a7
5c93950
48a1a10
 
d2223a7
6e63033
48a1a10
 
5ba115b
5c93950
48a1a10
ace5158
48a1a10
35a0b97
48a1a10
 
 
ace5158
48a1a10
 
 
a66b14e
48a1a10
 
6e63033
 
 
 
 
 
 
 
 
bf2e6de
6e63033
 
bf2e6de
6e63033
 
 
bf2e6de
 
92c6650
 
bf2e6de
a66b14e
6e63033
ace5158
48a1a10
7d3c1b0
9f99d98
 
7bc4322
ace5158
0bcab7d
48a1a10
 
 
0bcab7d
48a1a10
0bcab7d
 
 
a66b14e
0bcab7d
48a1a10
0bcab7d
 
48a1a10
 
5ba115b
 
48a1a10
 
 
 
 
 
 
df61fc1
0bcab7d
 
48a1a10
0bcab7d
 
 
 
df61fc1
 
0bcab7d
 
48a1a10
 
 
0bcab7d
48a1a10
df61fc1
48a1a10
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import streamlit as st
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login

# App title
st.set_page_config(page_title="GenAI Chat Bot")

# HF中国镜像站 Credentials
with st.sidebar:
    st.title('GenAI Chat Bot')
    #st.write('This is a generative AI Chat Bot.')
    
    # Use HF中国镜像站 API Key from secrets or environment
    api_key = os.getenv("llama3")

    if not api_key:
        st.error("API key is missing!")
        st.stop()

    # Authenticate with HF中国镜像站 Hub
    try:
        login(api_key)
        st.success('API successfully authenticated!', icon='✅')
    except Exception as e:
        st.error(f"Authentication failed: {e}")
        st.stop()

    st.subheader('Models and parameters')
    
    # Model selection categories
    model_options = {
        "Basic": [
            "meta-llama/Llama-3.2-1B"
        ],
        "Basic-Medium": [
            "meta-llama/Llama-3.2-1B-Instruct"
        ],
        "Medium-Fine": [
            "meta-llama/Llama-3.2-3B"
        ],
        "Finest": [
            "meta-llama/Llama-3.2-3B-Instruct"
        ]
    }

    # Select category (default set to "Basic-Medium")
    selected_category = st.sidebar.selectbox('Select Model Category', ["Basic", "Basic-Medium", "Medium-Fine", "Finest"], index=1)

    # Set selected model based on category
    selected_model = model_options[selected_category][0]

    # Slider inputs for parameters
    temperature = st.sidebar.slider('temperature', min_value=0.01, max_value=1.0, value=0.3, step=0.01)
    top_p = st.sidebar.slider('top_p', min_value=0.01, max_value=1.0, value=0.9, step=0.01)
    max_length = st.sidebar.slider('max_length', min_value=20, max_value=80, value=65, step=5)
    
    
    st.markdown("Disclaimer: The performance and speed of this GenAI tool depends on the machine configuration and model selection")
    

# Store LLM generated responses
if "messages" not in st.session_state.keys():
    st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]

# Display or clear chat messages
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.write(message["content"])

def clear_chat_history():
    st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
st.sidebar.button('Clear Chat History', on_click=clear_chat_history)

# Load the tokenizer and model
try:
    tokenizer = AutoTokenizer.from_pretrained(selected_model)
    model = AutoModelForCausalLM.from_pretrained(selected_model, torch_dtype=torch.bfloat16, device_map="auto")
except Exception as e:
    st.error(f"Error loading model: {e}")
    st.stop()

# Function for generating response using HF中国镜像站 model
def generate_huggingface_response(prompt_input):
    inputs = tokenizer(prompt_input, return_tensors="pt").to(model.device)
    try:
        # Generate response from the model
        with torch.no_grad():
            outputs = model.generate(inputs["input_ids"], max_new_tokens=max_length, temperature=temperature, top_p=top_p, do_sample=True)
        
        # Decode the generated response
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        return response
    except Exception as e:
        st.error(f"Error generating response: {e}")
        return "Oops! Something went wrong."

# User-provided prompt
if prompt := st.chat_input(disabled=not api_key):
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.write(prompt)

# Generate a new response if last message is not from assistant
if st.session_state.messages[-1]["role"] != "assistant":
    with st.chat_message("assistant"):
        with st.spinner("Thinking..."):
            response = generate_huggingface_response(prompt)
            placeholder = st.empty()
            full_response = ''
            for item in response:
                full_response += item
                placeholder.markdown(full_response)
            placeholder.markdown(full_response)
    message = {"role": "assistant", "content": full_response}
    st.session_state.messages.append(message)