Yassmen commited on
Commit
e3521d3
·
verified ·
1 Parent(s): a3c7a33

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -101
app.py CHANGED
@@ -1,101 +1,103 @@
1
- import torch
2
- import re
3
- import gradio as gr
4
- import soundfile as sf
5
- import numpy as np
6
- from transformers import SpeechT5HifiGan
7
- from IPython.display import Audio
8
- from transformers import SpeechT5ForTextToSpeech
9
- from transformers import SpeechT5Processor
10
-
11
- # helper function
12
- number_words = {
13
- 0: "zero", 1: "one", 2: "two", 3: "three", 4: "four", 5: "five", 6: "six", 7: "seven", 8: "eight", 9: "nine",
14
- 10: "ten", 11: "eleven", 12: "twelve", 13: "thirteen", 14: "fourteen", 15: "fifteen", 16: "sixteen", 17: "seventeen",
15
- 18: "eighteen", 19: "nineteen", 20: "twenty", 30: "thirty", 40: "forty", 50: "fifty", 60: "sixty", 70: "seventy",
16
- 80: "eighty", 90: "ninety", 100: "hundred", 1000: "thousand"
17
- }
18
- replacements = [
19
- ("“", '"'),
20
- ("”", '"'),
21
- ("’", ","),
22
- ("_", " "),
23
- ("\xa0", " "),
24
- ("\n", " "),
25
- ("$","dollar"),
26
- ("%","percent"),
27
- ("&","and"),
28
- ("*","star"),
29
- ("+","plus"),
30
- ("—","-")
31
-
32
- ]
33
- def number_to_words(number):
34
- if number < 20:
35
- return number_words[number]
36
- elif number < 100:
37
- tens, unit = divmod(number, 10)
38
- return number_words[tens * 10] + (" " + number_words[unit] if unit else "")
39
- elif number < 1000:
40
- hundreds, remainder = divmod(number, 100)
41
- return (number_words[hundreds] + " hundred" if hundreds > 1 else "hundred") + (" " + number_to_words(remainder) if remainder else "")
42
- elif number < 1000000:
43
- thousands, remainder = divmod(number, 1000)
44
- return (number_to_words(thousands) + " thousand" if thousands > 1 else "thousand") + (" " + number_to_words(remainder) if remainder else "")
45
- elif number < 1000000000:
46
- millions, remainder = divmod(number, 1000000)
47
- return number_to_words(millions) + " million" + (" " + number_to_words(remainder) if remainder else "")
48
- elif number < 1000000000000:
49
- billions, remainder = divmod(number, 1000000000)
50
- return number_to_words(billions) + " billion" + (" " + number_to_words(remainder) if remainder else "")
51
- else:
52
- return str(number)
53
-
54
- def replace_numbers_with_words(text):
55
-
56
- def replace(match):
57
- number = int(match.group())
58
- return number_to_words(number)
59
-
60
- # Find the numbers and change with words.
61
- result = re.sub(r'\b\d+\b', replace, text)
62
-
63
- return result
64
-
65
- def cleanup_text(text):
66
- for src, dst in replacements:
67
- text = text.replace(src, dst)
68
- return text
69
-
70
-
71
- model = SpeechT5ForTextToSpeech.from_pretrained(
72
- "Yassmen/speecht5_finetuned_english_tehnical"
73
- )
74
-
75
- checkpoint = "microsoft/speecht5_tts"
76
- processor = SpeechT5Processor.from_pretrained(checkpoint)
77
-
78
-
79
-
80
- def generate_wav_file(text):
81
-
82
- converted_text = replace_numbers_with_words(text)
83
- cleaned_text = cleanup_text(converted_text)
84
- final_text = normalize_text(cleaned_text)
85
- inputs = processor(text=final_text, return_tensors="pt")
86
- speaker_embeddings = torch.tensor(np.load('speaker_embedding.npy'))
87
- vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
88
- speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
89
-
90
-
91
- return Audio(speech.numpy(), rate=16000)
92
-
93
- iface = gr.Interface(
94
- fn=generate_wav_file,
95
- inputs=gr.Textbox(lines=3, label="Enter text to convert to speech"),
96
- outputs="audio",
97
- title="Text-to-Speech Technical EN"
98
- )
99
- if __name__ == "__main__":
100
- iface.launch()
101
-
 
 
 
1
+ import torch
2
+ import re
3
+ import gradio as gr
4
+ import soundfile as sf
5
+ import numpy as np
6
+ from transformers import SpeechT5HifiGan
7
+ from IPython.display import Audio
8
+ from transformers import SpeechT5ForTextToSpeech
9
+ from transformers import SpeechT5Processor
10
+
11
+ # helper function
12
+ number_words = {
13
+ 0: "zero", 1: "one", 2: "two", 3: "three", 4: "four", 5: "five", 6: "six", 7: "seven", 8: "eight", 9: "nine",
14
+ 10: "ten", 11: "eleven", 12: "twelve", 13: "thirteen", 14: "fourteen", 15: "fifteen", 16: "sixteen", 17: "seventeen",
15
+ 18: "eighteen", 19: "nineteen", 20: "twenty", 30: "thirty", 40: "forty", 50: "fifty", 60: "sixty", 70: "seventy",
16
+ 80: "eighty", 90: "ninety", 100: "hundred", 1000: "thousand"
17
+ }
18
+ replacements = [
19
+ ("“", '"'),
20
+ ("”", '"'),
21
+ ("’", ","),
22
+ ("_", " "),
23
+ ("\xa0", " "),
24
+ ("\n", " "),
25
+ ("$","dollar"),
26
+ ("%","percent"),
27
+ ("&","and"),
28
+ ("*","star"),
29
+ ("+","plus"),
30
+ ("—","-")
31
+
32
+ ]
33
+ def number_to_words(number):
34
+ if number < 20:
35
+ return number_words[number]
36
+ elif number < 100:
37
+ tens, unit = divmod(number, 10)
38
+ return number_words[tens * 10] + (" " + number_words[unit] if unit else "")
39
+ elif number < 1000:
40
+ hundreds, remainder = divmod(number, 100)
41
+ return (number_words[hundreds] + " hundred" if hundreds > 1 else "hundred") + (" " + number_to_words(remainder) if remainder else "")
42
+ elif number < 1000000:
43
+ thousands, remainder = divmod(number, 1000)
44
+ return (number_to_words(thousands) + " thousand" if thousands > 1 else "thousand") + (" " + number_to_words(remainder) if remainder else "")
45
+ elif number < 1000000000:
46
+ millions, remainder = divmod(number, 1000000)
47
+ return number_to_words(millions) + " million" + (" " + number_to_words(remainder) if remainder else "")
48
+ elif number < 1000000000000:
49
+ billions, remainder = divmod(number, 1000000000)
50
+ return number_to_words(billions) + " billion" + (" " + number_to_words(remainder) if remainder else "")
51
+ else:
52
+ return str(number)
53
+
54
+ def replace_numbers_with_words(text):
55
+
56
+ def replace(match):
57
+ number = int(match.group())
58
+ return number_to_words(number)
59
+
60
+ # Find the numbers and change with words.
61
+ result = re.sub(r'\b\d+\b', replace, text)
62
+
63
+ return result
64
+
65
+ def cleanup_text(text):
66
+ for src, dst in replacements:
67
+ text = text.replace(src, dst)
68
+ return text
69
+
70
+
71
+ model = SpeechT5ForTextToSpeech.from_pretrained(
72
+ "Yassmen/speecht5_finetuned_english_tehnical"
73
+ )
74
+
75
+ checkpoint = "microsoft/speecht5_tts"
76
+ processor = SpeechT5Processor.from_pretrained(checkpoint)
77
+
78
+
79
+
80
+ def generate_wav_file(text):
81
+ try:
82
+ converted_text = replace_numbers_with_words(text)
83
+ cleaned_text = cleanup_text(converted_text)
84
+ final_text = normalize_text(cleaned_text)
85
+ inputs = processor(text=final_text, return_tensors="pt")
86
+ speaker_embeddings = torch.tensor(np.load('speaker_embedding.npy'))
87
+ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
88
+ speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
89
+ return Audio(speech.numpy(), rate=16000)
90
+ except Exception as e:
91
+ print(f"Error: {e}")
92
+ return None
93
+
94
+
95
+ iface = gr.Interface(
96
+ fn=generate_wav_file,
97
+ inputs=gr.Textbox(lines=3, label="Enter text to convert to speech"),
98
+ outputs="audio",
99
+ title="Text-to-Speech Technical EN"
100
+ )
101
+ if __name__ == "__main__":
102
+ iface.launch()
103
+