import streamlit as st from PyPDF2 import PdfReader from transformers import pipeline # Zero-shot classification pipeline @st.cache_resource #def load_classifier(): #return pipeline("zero-shot-classification", model="facebook/bart-large-mnli") # Streamlit app UI #def main(): #st.title("PDF Genre Classifier") #st.write("Upload a PDF file, and this app will classify its genres using zero-shot classification.") # Function to extract text from PDF def extract_text_from_pdf(pdf_file): reader = PdfReader(pdf_file) text = "" for page in reader.pages: text += page.extract_text() return text # File uploader title = st.title("BOOK GENRE PREDICTION APP") print(title) sub = st.write("Upload a book(pdf format), and this app will predict the genres in the book.") print(sub) pdf_file = st.file_uploader("Upload PDF", type=["pdf"]) if pdf_file is not None: st.write("Processing the PDF...") text = extract_text_from_pdf(pdf_file) if text.strip(): st.write("PDF Text Extracted. Predicting the Genres...") classifier = pipeline("zero-shot-classification", model = "facebook/bart-large-mnli") #load_classifier() # Define candidate genres candidate_labels = ["Scientific Papers", "Technical Documentation", "Research Reports", "Academic Journals", "White Papers", "Technical Manuals", "Patents", "Software Documentation", "Engineering Specifications", "Computer Science Literature", "Machine Learning Publications", "Data Science Reports", "Network Architecture Descriptions", "Cybersecurity Analysis", "Algorithm Descriptions", "Fantasy", "Science Fiction", "Mystery", "Thriller", "Romance", "Historical Fiction", "Horror", "Adventure", "Crime", "Western", "Dystopian", "Magical Realism", "Young Adult", "Children's Literature", "Gothic", "Biography", "Autobiography", "Memoir", "Travel Writing", "History", "Philosophy", "Psychology", "Self-Help", "Political Commentary", "True Crime", "Nature Writing", "Cultural Studies", "Sociology", "Anthropology", "Religious Studies", "Poetry", "Drama", "Epic", "Short Story", "Novel", "Novella", "Satire", "Tragedy", "Comedy", "Tragicomedy", "News Reporting", "Feature Writing", "Opinion Pieces", "Investigative Journalism", "Editorial", "Profile Writing", "Sports Writing", "Political Journalism", "Dissertation", "Thesis", "Critical Analysis", "Comparative Study", "Literature Review", "Meta-Analysis", " Case Study"] # # Perform zero-shot classification result = classifier(text[:3000], candidate_labels, multi_label=True) #[:1000]), candidate_labels, multi_label=True) genres = sorted(zip(result["labels"], result["scores"]), key=lambda x: x[1], reverse=True) st.subheader("Top 20 Detected Genres:") top_genres = genres[:20] for genre, score in top_genres: st.write(f"**{genre.capitalize()}**: {score:.2f}") else: st.error("No text could be extracted from the PDF. Please try another file.")