Spaces:

Fluospark128
/

Genre_prediction

Sleeping

App Files Files Community

Fluospark128 commited on Dec 26, 2024

Commit

691ba89

verified ·

1 Parent(s): 8e95261

Create app.py

Browse files

To extract the text from the pdf file and return the genre labels. #the model and pretrained model

Files changed (1) hide show

app.py +53 -0

app.py ADDED Viewed

	@@ -0,0 +1,53 @@

+pip install PyPDF2
+import streamlit as st
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+# Load the model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("your_huggingface_model_path")
+model = AutoModelForSequenceClassification.from_pretrained("your_huggingface_model_path")
+# Define genre labels
+genre_labels = ["mystery", "sci-fi", "fantasy", "romance", "thriller", "horror", "drama", "comedy",
+    "historical fiction", "adventure", "action", "young adult", "classic", "biography",
+    "non-fiction", "self-help", "children's literature", "poetry", "crime", "dystopian"]
+st.title("Book Genre Classifier")
+# Text input
+#file = st.file_uploader("Upload the pdf file")
+#import streamlit as st
+from PyPDF2 import PdfReader
+# Streamlit app
+st.subheader("PDF Text Extractor")
+# Upload PDF
+uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
+if uploaded_file:
+   # Extract text from the uploaded PDF
+   reader = PdfReader(uploaded_file)
+   all_text = ""
+   for page in reader.pages:
+       all_text += page.extract_text()
+       # Display extracted text
+       st.subheader("Extracted Text")
+       st.text_area("PDF Content", all_text, height=300)
+#book_text = st.text_area("Enter the book's text or summary:", "")
+if st.button("Classify"):
+    with st.spinner("Classifying..."):
+        inputs = tokenizer(all_text, return_tensors="pt", truncation=True, padding=True)
+        outputs = model(**inputs)
+        scores = torch.softmax(outputs.logits, dim=1).detach().numpy()
+        # Display results
+        st.subheader("Predicted Genres:")
+        for i, label in enumerate(genre_labels):
+            st.write(f"{label}: {scores[0][i]:.2f}")