Spaces:

bigcomputer
/

arena-annotation-progress

Running

Terry Zhuo

update

7168226 8 days ago

6.05 kB

	import os
	import gradio as gr
	import pandas as pd
	from datetime import datetime
	import time
	import sys
	import importlib.util
	import threading
	from log_reader import RemoteLogReader

	# Define the path for the azure_count_ip_data module
	azure_count_ip_data_path = os.path.join(os.path.dirname(__file__), 'azure_count_ip_data.py')

	# Import the module dynamically
	spec = importlib.util.spec_from_file_location("azure_count_ip_data", azure_count_ip_data_path)
	azure_count_ip_data = importlib.util.module_from_spec(spec)
	spec.loader.exec_module(azure_count_ip_data)

	# Get the functions we need
	count_files_per_annotator = azure_count_ip_data.count_files_per_annotator
	count_deduplicated_files_per_annotator = azure_count_ip_data.count_deduplicated_files_per_annotator

	# Define the path for storing the data
	DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
	STATS_FILE = os.path.join(DATA_DIR, 'battle_stats.csv')
	LAST_UPDATE_FILE = os.path.join(DATA_DIR, 'last_update.txt')

	# Ensure data directory exists
	os.makedirs(DATA_DIR, exist_ok=True)

	def save_stats(df, current_time):
	"""Save statistics and last update time to files"""
	df.to_csv(STATS_FILE, index=False)
	with open(LAST_UPDATE_FILE, 'w') as f:
	f.write(current_time)

	def load_stats():
	"""Load statistics and last update time from files"""
	try:
	df = pd.read_csv(STATS_FILE)
	with open(LAST_UPDATE_FILE, 'r') as f:
	last_update = f.read().strip()
	return df, last_update
	except (FileNotFoundError, pd.errors.EmptyDataError):
	return pd.DataFrame(columns=['Annotator', 'Total Count', 'Unique Count', 'Unique %']), ""

	def update_stats():
	"""Get the latest battle statistics with both total and deduplicated counts"""
	try:
	# Initialize RemoteLogReader
	reader = RemoteLogReader()

	# Get total annotator counts
	total_counts = count_files_per_annotator(reader)

	# Get deduplicated annotator counts
	unique_counts = count_deduplicated_files_per_annotator(reader)

	# Combine the data into a single DataFrame
	data = []
	all_annotators = set(total_counts.keys()) \| set(unique_counts.keys())

	# Track totals for summary row
	total_sum = 0
	unique_sum = 0

	for annotator in all_annotators:
	total = total_counts.get(annotator, 0)
	unique = unique_counts.get(annotator, 0)
	# Calculate percentage of unique prompts
	percentage = round((unique / total * 100) if total > 0 else 0, 1)

	# Add to totals
	total_sum += total
	unique_sum += unique

	data.append({
	'Annotator': annotator,
	'Total Count': total,
	'Unique Count': unique,
	'Unique %': f"{percentage}%"
	})

	# Add summary row
	overall_percentage = round((unique_sum / total_sum * 100) if total_sum > 0 else 0, 1)
	data.append({
	'Annotator': 'TOTAL',
	'Total Count': total_sum,
	'Unique Count': unique_sum,
	'Unique %': f"{overall_percentage}%"
	})

	# Convert to DataFrame and sort by total count, keeping TOTAL at the bottom
	df = pd.DataFrame(data)
	# Move TOTAL row to the end
	df = pd.concat([
	df[df['Annotator'] != 'TOTAL'].sort_values('Total Count', ascending=False),
	df[df['Annotator'] == 'TOTAL']
	]).reset_index(drop=True)

	# Get current time
	current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

	# Save the updated stats
	save_stats(df, current_time)

	return df, current_time
	except Exception as e:
	print(f"Error updating stats: {e}")
	return pd.DataFrame(columns=['Annotator', 'Total Count', 'Unique Count', 'Unique %']), ""

	def auto_update(state):
	"""Background task to update stats every hour"""
	while state['running']:
	state['stats'], state['last_update'] = update_stats()
	time.sleep(3600) # Sleep for 1 hour

	def create_ui():
	state = {'running': True}

	# Try to load existing stats first
	state['stats'], state['last_update'] = load_stats()

	# If no existing stats or they're empty, update them
	if state['stats'].empty:
	state['stats'], state['last_update'] = update_stats()

	# Start background update thread
	update_thread = threading.Thread(target=auto_update, args=(state,))
	update_thread.daemon = True
	update_thread.start()

	def get_current_stats():
	return state['stats']

	def get_last_update():
	return state['last_update']

	def manual_refresh():
	state['stats'], state['last_update'] = update_stats()
	return state['stats'], state['last_update']

	with gr.Blocks(title="Battle Count Statistics") as app:
	gr.Markdown("# Battle Count Statistics")
	gr.Markdown("Displays the count of battles per annotator. 'Total Count' shows all valid battles, while 'Unique Count' shows deduplicated battles based on the first user prompt. Only conversations that pass the vote conditions are counted, and example prompts are excluded from the unique count.")

	with gr.Row():
	last_update = gr.Textbox(
	value=get_last_update,
	label="Last Updated",
	interactive=False
	)

	with gr.Row():
	output = gr.DataFrame(
	value=get_current_stats,
	interactive=False,
	wrap=True,
	column_widths=["40%", "20%", "20%", "20%"],
	height=600
	)

	refresh_btn = gr.Button("Refresh Now")
	refresh_btn.click(fn=manual_refresh, outputs=[output, last_update])

	return app

	if __name__ == "__main__":
	app = create_ui()
	app.launch()