|
import os |
|
import gradio as gr |
|
import pandas as pd |
|
from datetime import datetime |
|
import time |
|
import sys |
|
import importlib.util |
|
import threading |
|
from log_reader import RemoteLogReader |
|
|
|
|
|
azure_count_ip_data_path = os.path.join(os.path.dirname(__file__), 'azure_count_ip_data.py') |
|
|
|
|
|
spec = importlib.util.spec_from_file_location("azure_count_ip_data", azure_count_ip_data_path) |
|
azure_count_ip_data = importlib.util.module_from_spec(spec) |
|
spec.loader.exec_module(azure_count_ip_data) |
|
|
|
|
|
count_files_per_annotator = azure_count_ip_data.count_files_per_annotator |
|
count_deduplicated_files_per_annotator = azure_count_ip_data.count_deduplicated_files_per_annotator |
|
|
|
|
|
DATA_DIR = os.path.join(os.path.dirname(__file__), 'data') |
|
STATS_FILE = os.path.join(DATA_DIR, 'battle_stats.csv') |
|
LAST_UPDATE_FILE = os.path.join(DATA_DIR, 'last_update.txt') |
|
|
|
|
|
os.makedirs(DATA_DIR, exist_ok=True) |
|
|
|
def save_stats(df, current_time): |
|
"""Save statistics and last update time to files""" |
|
df.to_csv(STATS_FILE, index=False) |
|
with open(LAST_UPDATE_FILE, 'w') as f: |
|
f.write(current_time) |
|
|
|
def load_stats(): |
|
"""Load statistics and last update time from files""" |
|
try: |
|
df = pd.read_csv(STATS_FILE) |
|
with open(LAST_UPDATE_FILE, 'r') as f: |
|
last_update = f.read().strip() |
|
return df, last_update |
|
except (FileNotFoundError, pd.errors.EmptyDataError): |
|
return pd.DataFrame(columns=['Annotator', 'Total Count', 'Unique Count', 'Unique %']), "" |
|
|
|
def update_stats(): |
|
"""Get the latest battle statistics with both total and deduplicated counts""" |
|
try: |
|
|
|
reader = RemoteLogReader() |
|
|
|
|
|
total_counts = count_files_per_annotator(reader) |
|
|
|
|
|
unique_counts = count_deduplicated_files_per_annotator(reader) |
|
|
|
|
|
data = [] |
|
all_annotators = set(total_counts.keys()) | set(unique_counts.keys()) |
|
|
|
|
|
total_sum = 0 |
|
unique_sum = 0 |
|
|
|
for annotator in all_annotators: |
|
total = total_counts.get(annotator, 0) |
|
unique = unique_counts.get(annotator, 0) |
|
|
|
percentage = round((unique / total * 100) if total > 0 else 0, 1) |
|
|
|
|
|
total_sum += total |
|
unique_sum += unique |
|
|
|
data.append({ |
|
'Annotator': annotator, |
|
'Total Count': total, |
|
'Unique Count': unique, |
|
'Unique %': f"{percentage}%" |
|
}) |
|
|
|
|
|
overall_percentage = round((unique_sum / total_sum * 100) if total_sum > 0 else 0, 1) |
|
data.append({ |
|
'Annotator': 'TOTAL', |
|
'Total Count': total_sum, |
|
'Unique Count': unique_sum, |
|
'Unique %': f"{overall_percentage}%" |
|
}) |
|
|
|
|
|
df = pd.DataFrame(data) |
|
|
|
df = pd.concat([ |
|
df[df['Annotator'] != 'TOTAL'].sort_values('Total Count', ascending=False), |
|
df[df['Annotator'] == 'TOTAL'] |
|
]).reset_index(drop=True) |
|
|
|
|
|
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") |
|
|
|
|
|
save_stats(df, current_time) |
|
|
|
return df, current_time |
|
except Exception as e: |
|
print(f"Error updating stats: {e}") |
|
return pd.DataFrame(columns=['Annotator', 'Total Count', 'Unique Count', 'Unique %']), "" |
|
|
|
def auto_update(state): |
|
"""Background task to update stats every hour""" |
|
while state['running']: |
|
state['stats'], state['last_update'] = update_stats() |
|
time.sleep(3600) |
|
|
|
def create_ui(): |
|
state = {'running': True} |
|
|
|
|
|
state['stats'], state['last_update'] = load_stats() |
|
|
|
|
|
if state['stats'].empty: |
|
state['stats'], state['last_update'] = update_stats() |
|
|
|
|
|
update_thread = threading.Thread(target=auto_update, args=(state,)) |
|
update_thread.daemon = True |
|
update_thread.start() |
|
|
|
def get_current_stats(): |
|
return state['stats'] |
|
|
|
def get_last_update(): |
|
return state['last_update'] |
|
|
|
def manual_refresh(): |
|
state['stats'], state['last_update'] = update_stats() |
|
return state['stats'], state['last_update'] |
|
|
|
with gr.Blocks(title="Battle Count Statistics") as app: |
|
gr.Markdown("# Battle Count Statistics") |
|
gr.Markdown("Displays the count of battles per annotator. 'Total Count' shows all valid battles, while 'Unique Count' shows deduplicated battles based on the first user prompt. Only conversations that pass the vote conditions are counted, and example prompts are excluded from the unique count.") |
|
|
|
with gr.Row(): |
|
last_update = gr.Textbox( |
|
value=get_last_update, |
|
label="Last Updated", |
|
interactive=False |
|
) |
|
|
|
with gr.Row(): |
|
output = gr.DataFrame( |
|
value=get_current_stats, |
|
interactive=False, |
|
wrap=True, |
|
column_widths=["40%", "20%", "20%", "20%"], |
|
height=600 |
|
) |
|
|
|
refresh_btn = gr.Button("Refresh Now") |
|
refresh_btn.click(fn=manual_refresh, outputs=[output, last_update]) |
|
|
|
return app |
|
|
|
if __name__ == "__main__": |
|
app = create_ui() |
|
app.launch() |