Spaces:
Runtime error
Runtime error
demo
Browse files
app.py
CHANGED
|
@@ -3,7 +3,6 @@ import spaces
|
|
| 3 |
import gradio as gr
|
| 4 |
import librosa
|
| 5 |
import numpy as np
|
| 6 |
-
from speechbrain.inference import EncoderClassifier
|
| 7 |
import torch
|
| 8 |
from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor, SpeechT5HifiGan
|
| 9 |
|
|
@@ -18,23 +17,6 @@ speaker_embeddings = {
|
|
| 18 |
"BDP": "spkemb/speaker2.npy",
|
| 19 |
}
|
| 20 |
|
| 21 |
-
spk_model_name = "speechbrain/spkrec-xvect-voxceleb"
|
| 22 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 23 |
-
speaker_model = EncoderClassifier.from_hparams(
|
| 24 |
-
source=spk_model_name,
|
| 25 |
-
run_opts={"device": device},
|
| 26 |
-
savedir=os.path.join("/tmp", spk_model_name),
|
| 27 |
-
)
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
def create_speaker_embedding(waveform):
|
| 31 |
-
with torch.no_grad():
|
| 32 |
-
se = speaker_model.encode_batch(torch.tensor(waveform))
|
| 33 |
-
se = torch.nn.functional.normalize(se, dim=2)
|
| 34 |
-
se = se.squeeze().cpu().numpy()
|
| 35 |
-
return se
|
| 36 |
-
|
| 37 |
-
|
| 38 |
@spaces.GPU
|
| 39 |
def predict(text, speaker, audio):
|
| 40 |
if len(text.strip()) == 0:
|
|
@@ -67,7 +49,6 @@ gr.Interface(
|
|
| 67 |
fn=predict,
|
| 68 |
inputs=[
|
| 69 |
gr.Text(label="Input Text"),
|
| 70 |
-
gr.Audio(sources="microphone", type="filepath"),
|
| 71 |
gr.Radio(label="Speaker", choices=[
|
| 72 |
"GGP (gwryw-gogledd-pro)",
|
| 73 |
"BGP (benyw-gogledd-pro)",
|
|
|
|
| 3 |
import gradio as gr
|
| 4 |
import librosa
|
| 5 |
import numpy as np
|
|
|
|
| 6 |
import torch
|
| 7 |
from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor, SpeechT5HifiGan
|
| 8 |
|
|
|
|
| 17 |
"BDP": "spkemb/speaker2.npy",
|
| 18 |
}
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
@spaces.GPU
|
| 21 |
def predict(text, speaker, audio):
|
| 22 |
if len(text.strip()) == 0:
|
|
|
|
| 49 |
fn=predict,
|
| 50 |
inputs=[
|
| 51 |
gr.Text(label="Input Text"),
|
|
|
|
| 52 |
gr.Radio(label="Speaker", choices=[
|
| 53 |
"GGP (gwryw-gogledd-pro)",
|
| 54 |
"BGP (benyw-gogledd-pro)",
|