SA / app.py
isom5240's picture
Update app.py
3a6cb45 verified
raw
history blame
959 Bytes
import streamlit as st
from transformers import pipeline
from io import BytesIO
from PIL import Image
import torch
# Load pipelines
image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
text_to_speech = pipeline("text-to-speech", model="facebook/mms-tts-eng")
st.title("Image-to-Text and Text-to-Speech App")
# Image uploader
uploaded_image = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
if uploaded_image:
image = Image.open(uploaded_image)
st.image(image, caption="Uploaded Image", use_container_width=True)
# Convert image to text
text_output = image_to_text(image)[0]['generated_text']
st.write("### Extracted Text:")
st.write(text_output)
# Convert text to speech
speech_output = text_to_speech(text_output)
audio_bytes = BytesIO(speech_output['audio'])
st.write("### Listen to Speech Output:")
st.audio(audio_bytes, format="audio/wav")