pground / app.py
Maxxie88's picture
Create app.py
536424d
raw
history blame
690 Bytes
import gradio as gr
from huggingface_hub import snapshot_download
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
model_id = "wbbbbb/wav2vec2-large-chinese-zh-cn"
model = snapshot_download(repo_id=model_id, cache_dir='cache')
processor = Wav2Vec2Processor.from_pretrained(model_id)
def transcribe(audio):
# 语音识别接口
inputs = processor(audio, sampling_rate=16_000, return_tensors="pt", padding=True)
with torch.no_grad():
logits = model(inputs.input_values).logits
prediction = processor.batch_decode(torch.argmax(logits, dim=-1))
return prediction[0]
iface = gr.Interface(fn=transcribe, inputs="audio", outputs="text")
iface.launch()