File size: 981 Bytes
59c601c
 
2431d8f
59c601c
252e08d
 
 
59c601c
252e08d
59c601c
252e08d
 
59c601c
252e08d
2431d8f
 
252e08d
 
2431d8f
252e08d
 
 
 
 
 
0d4fa66
 
 
86f38db
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import streamlit as st
from transformers import pipeline
from PIL import Image

# Load pipelines
image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
text_to_speech = pipeline("text-to-speech", model="facebook/mms-tts-eng")

st.title("Image-to-Text and Text-to-Speech App")

# Image uploader
uploaded_image = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])

if uploaded_image:
    image = Image.open(uploaded_image)
    st.image(image, caption="Uploaded Image", use_container_width=True)
    
    # Convert image to text
    text_output = image_to_text(image)[0]['generated_text']
    st.write("### Extracted Text:")
    st.write(text_output)
    
    # Convert text to speech
    speech_output = text_to_speech(text_output)
    st.write("### Listen to Speech Output:")
    st.audio(speech_output['audio'], 
             format="audio/wav", 
             start_time=0, 
             sample_rate = speech_output['sampling_rate'])