Fluospark128 commited on
Commit
691ba89
·
verified ·
1 Parent(s): 8e95261

Create app.py

Browse files

To extract the text from the pdf file and return the genre labels. #the model and pretrained model

Files changed (1) hide show
  1. app.py +53 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pip install PyPDF2
2
+
3
+
4
+ import streamlit as st
5
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
+ import torch
7
+
8
+ # Load the model and tokenizer
9
+ tokenizer = AutoTokenizer.from_pretrained("your_huggingface_model_path")
10
+ model = AutoModelForSequenceClassification.from_pretrained("your_huggingface_model_path")
11
+
12
+ # Define genre labels
13
+ genre_labels = ["mystery", "sci-fi", "fantasy", "romance", "thriller", "horror", "drama", "comedy",
14
+     "historical fiction", "adventure", "action", "young adult", "classic", "biography",
15
+     "non-fiction", "self-help", "children's literature", "poetry", "crime", "dystopian"]
16
+
17
+ st.title("Book Genre Classifier")
18
+
19
+ # Text input
20
+ #file = st.file_uploader("Upload the pdf file")
21
+
22
+
23
+ #import streamlit as st
24
+ from PyPDF2 import PdfReader
25
+
26
+ # Streamlit app
27
+ st.subheader("PDF Text Extractor")
28
+
29
+ # Upload PDF
30
+ uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
31
+
32
+ if uploaded_file:
33
+ # Extract text from the uploaded PDF
34
+ reader = PdfReader(uploaded_file)
35
+ all_text = ""
36
+ for page in reader.pages:
37
+ all_text += page.extract_text()
38
+
39
+ # Display extracted text
40
+ st.subheader("Extracted Text")
41
+ st.text_area("PDF Content", all_text, height=300)
42
+ #book_text = st.text_area("Enter the book's text or summary:", "")
43
+
44
+ if st.button("Classify"):
45
+     with st.spinner("Classifying..."):
46
+         inputs = tokenizer(all_text, return_tensors="pt", truncation=True, padding=True)
47
+         outputs = model(**inputs)
48
+         scores = torch.softmax(outputs.logits, dim=1).detach().numpy()
49
+
50
+         # Display results
51
+         st.subheader("Predicted Genres:")
52
+         for i, label in enumerate(genre_labels):
53
+             st.write(f"{label}: {scores[0][i]:.2f}")