File size: 4,308 Bytes
ab5f5f1 0232cf1 ab5f5f1 dc685a9 ab5f5f1 dc685a9 ab5f5f1 0232cf1 ab5f5f1 0232cf1 ab5f5f1 dc685a9 ab5f5f1 dc685a9 ab5f5f1 0232cf1 a1135a9 0232cf1 a1135a9 ab5f5f1 0232cf1 ab5f5f1 dc685a9 ab5f5f1 dc685a9 ab5f5f1 0232cf1 ab5f5f1 dc685a9 ab5f5f1 dc685a9 ab5f5f1 0232cf1 ab5f5f1 dc685a9 ab5f5f1 dc685a9 ab5f5f1 dc685a9 ab5f5f1 dc685a9 ab5f5f1 0232cf1 ab5f5f1 dc685a9 ab5f5f1 dc685a9 ab5f5f1 0232cf1 ab5f5f1 dc685a9 ab5f5f1 dc685a9 ab5f5f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import gradio as gr
import pandas as pd
import plotly.express as px
BETTERTRANSFORMER_DATA = [
# open llm
"Model 🤗",
"DType 📥",
"Backend 🏭",
"Params (B)",
"Architecture 🏛️",
"Open LLM Score (%)",
# deployment settings
"DType 📥",
"Backend 🏭",
"Optimization 🛠️",
"Quantization 🗜️",
"Optimization 🛠️ BetterTransformer",
# primary measurements
"Prefill (s)",
"Prefill (s) BetterTransformer",
"Decode (tokens/s)",
"Decode (tokens/s) BetterTransformer",
"End-to-End (tokens/s)",
"End-to-End (tokens/s) BetterTransformer",
# speedups
"Prefill Speedup (%)",
"Decode Speedup (%)",
]
def get_bt_df(llm_perf_df):
copy_df = llm_perf_df.copy()
# seperate original model experiments from BetterTransformer experiments
original_df = copy_df[(copy_df["Optimization 🛠️"] == "None") & (copy_df["DType 📥"] == "float16")]
bt_df = copy_df[(copy_df["Optimization 🛠️"] == "BetterTransformer") & (copy_df["DType 📥"] == "float16")]
# merge the two dataframes
bt_df = pd.merge(
original_df,
bt_df,
on=["Model 🤗", "Quantization 🗜️"],
suffixes=["", " BetterTransformer"],
)
# compute speedups
bt_df["Prefill Speedup (%)"] = (
(bt_df["Prefill (s)"] / bt_df["Prefill (s) BetterTransformer"]) * 100
).round(2) - 100
bt_df["Decode Speedup (%)"] = (
(bt_df["Decode (tokens/s) BetterTransformer"] / bt_df["Decode (tokens/s)"]) * 100
).round(2) - 100
# filter speedups > 1000%
bt_df = bt_df[bt_df["Prefill Speedup (%)"] < 1000]
bt_df = bt_df[bt_df["Decode Speedup (%)"] < 1000]
return bt_df
def get_bt_prefill_fig(llm_perf_df):
bt_df = get_bt_df(llm_perf_df)
# plot
prefill_fig = px.box(
bt_df,
x="Architecture 🏛️",
y="Prefill Speedup (%)",
color_discrete_sequence=px.colors.qualitative.Light24,
custom_data=BETTERTRANSFORMER_DATA,
color="Quantization 🗜️",
points="all",
)
# add hover data
prefill_fig.update_traces(
hovertemplate="<br>".join(
[f"<b>{column}:</b> %{{customdata[{i}]}}" for i, column in enumerate(BETTERTRANSFORMER_DATA)]
)
)
# add layout
prefill_fig.update_layout(
title={
"text": "Prefill Speedup per Architecture, Compared To Non-Optimized Model",
"y": 0.95,
"x": 0.5,
"xanchor": "center",
"yanchor": "top",
},
xaxis_title="LLM Architecture",
yaxis_title="Prefill Speedup (%)",
legend_title="Quantization Scheme",
width=1200,
height=600,
)
return prefill_fig
def get_bt_decode_fig(llm_perf_df):
bt_df = get_bt_df(llm_perf_df)
# plot
decode_fig = px.box(
bt_df,
x="Architecture 🏛️",
y="Decode Speedup (%)",
color_discrete_sequence=px.colors.qualitative.Light24,
custom_data=BETTERTRANSFORMER_DATA,
color="Quantization 🗜️",
points="all",
)
# add hover data
decode_fig.update_traces(
hovertemplate="<br>".join(
[f"<b>{column}:</b> %{{customdata[{i}]}}" for i, column in enumerate(BETTERTRANSFORMER_DATA)]
)
)
# add layout
decode_fig.update_layout(
title={
"text": "Decode Speedup per Architecture, Compared To Non-Optimized Model",
"y": 0.95,
"x": 0.5,
"xanchor": "center",
"yanchor": "top",
},
xaxis_title="LLM Architecture",
yaxis_title="Decode Speedup (%)",
legend_title="Quantization Scheme",
width=1200,
height=600,
)
return decode_fig
def create_bt_plots(llm_perf_df):
# descriptive text
gr.HTML("👆 Hover over the points 👆 for additional information.", elem_id="text")
# get figures
prefill_fig = get_bt_prefill_fig(llm_perf_df)
decode_fig = get_bt_decode_fig(llm_perf_df)
# create plots
prefill_plot = gr.components.Plot(value=prefill_fig, elem_id="plot", show_label=False)
decode_plot = gr.components.Plot(value=decode_fig, elem_id="plot", show_label=False)
return prefill_plot, decode_plot
|