Spaces:
Sleeping
Sleeping
| # Work in Progress by Marco Barnig | |
| # credits | |
| # https://piper.ttstool.com/ | |
| # https://huggingface.co/spaces/broadfield/piper-fast-tts | |
| # https://github.com/rhasspy | |
| # https://github.com/rhasspy/piper | |
| # https://github.com/broadfield-dev/PyPiperTTS-win | |
| # https://github.com/broadfield-dev/PyPiperTTS | |
| import gradio as gr | |
| import subprocess | |
| import os | |
| import json | |
| import uuid | |
| import requests | |
| from pypipertts import PyPiper | |
| pp=PyPiper() | |
| my_examples = """ | |
| An der Zäit hunn sech den Nordwand an d’Sonn gestridden, wie vun hinnen zwee wuel méi staark wier, | |
| wéi e Wanderer, deen an ee waarme Mantel agepak war, iwwert de Wee koum. | |
| Si goufen sech eens, datt deejéinege fir dee Stäerkste gëlle sollt, deen de Wanderer forcéiere géif, säi Mantel auszedoen. | |
| Den Nordwand huet mat aller Force geblosen, awer wat e méi geblosen huet, wat de Wanderer sech méi a säi Mantel agewéckelt huet. | |
| Um Enn huet den Nordwand säi Kampf opginn. Dunn huet d’Sonn d’Loft mat hire frëndleche Strale gewiermt, | |
| a schonn no kuerzer Zäit huet de Wanderer säi Mantel ausgedoen. Do huet den Nordwand missen zouginn, | |
| datt d’Sonn vun hinnen zwee dee Stäerkste wier.""" | |
| speeds = [ | |
| "ganz lues", | |
| "lues", | |
| "normal", | |
| "schnell", | |
| "ganz schnell" | |
| ] | |
| stops = [ | |
| "ganz kuerz", | |
| "kuerz", | |
| "mëttel", | |
| "laang", | |
| "ganz laang" | |
| ] | |
| def change_speed(choice): | |
| # print(f"choice: {choice}") | |
| if choice=="ganz lues": | |
| speed=1.8 | |
| elif choice=="lues": | |
| speed=1.4 | |
| elif choice=="normal": | |
| speed=1 | |
| elif choice=="schnell": | |
| speed=0.6 | |
| else: # ganz schnell | |
| speed=0.2 | |
| return speed | |
| def change_stop(choice): | |
| # print(f"choice: {choice}") | |
| if choice=="ganz kuez": | |
| stop=0.2 | |
| elif choice=="kuerz": | |
| stop=0.6 | |
| elif choice=="mëttel": | |
| stop=1 | |
| elif choice=="laang": | |
| stop=2.5 | |
| else: # ganz laang | |
| stop=4 | |
| return stop | |
| def init(): | |
| key_list=['lb_LU-marylux-medium','lb_LU-femaleLOD-medium','lb_LU-androgynous-medium'] | |
| return(gr.update(label="Voice",choices=key_list,value="lb_LU-femaleLOD-medium",interactive=True)) | |
| def new_load_mod(instr="en_US-joe-medium"): | |
| model=instr | |
| print(f"model: {model}") | |
| lang=instr.split("_")[0] | |
| # lang="lb" | |
| print(f"lang: {lang}") | |
| dia=instr.split("-")[0] | |
| # dia="lb_LU" | |
| print(f"dia: {dia}") | |
| name=instr.split("-")[1] | |
| # name="female2" | |
| print(f"name: {name}") | |
| style=instr.split("-")[2] | |
| # style="medium" | |
| print(f"style: {style}") | |
| file=f'{instr}.onnx' | |
| print(f"file: {file}") | |
| print(f"Loading model: {file}") | |
| print(f"os.path: {os.path}") | |
| if not os.path.isfile(f'{os.getcwd()}/voices/{file}'): | |
| print(f"Model not found locally") | |
| # m_path= f"https://huggingface.co/rhasspy/piper-voices/resolve/main/{lang}/{dia}/{name}/{style}/{file}" | |
| m_path= f"https://huggingface.co/mbarnig/lb_rhasspy_piper_tts/resolve/main/{lang}/{dia}/{name}/{style}/{file}" | |
| print(f"m_path: {m_path}") | |
| print("Downloading json...") | |
| json_file=requests.get(f"{m_path}.json") | |
| print("Downloading model...") | |
| mod_file=requests.get(m_path) | |
| with open(f'{os.getcwd()}/voices/{file}','wb') as m: | |
| m.write(mod_file.content) | |
| m.close() | |
| with open(f'{os.getcwd()}/voices/{file}.json','wb') as j: | |
| j.write(json_file.content) | |
| j.close() | |
| pp.json_ob=f'{os.getcwd()}/voices/{file}.json' | |
| print("Model Loaded") | |
| def load_mod(instr="en_US-joe-medium"): | |
| load_mes=gr.Info(f"""Loading Model...<br>{instr}""",duration=2) | |
| # new_load_mod(instr=instr) | |
| new_load_mod(instr=instr) | |
| with open(pp.json_ob,'r') as f: | |
| #json_ob=json.dumps(f.read(),indent=4) | |
| json_ob=f.read() | |
| load_mes=gr.Info(f"Model Loaded<br>{instr}",duration=2) | |
| return json_ob | |
| def save_set(model,length,noise,width,sen_pause): | |
| if not os.path.isdir(f'{os.getcwd()}/saved'): | |
| os.mkdir(f'{os.getcwd()}/saved') | |
| set_json={"model":model,"length":length,"noise":noise,"width":width,"pause":sen_pause} | |
| file_name=f'{model}__{length}__{noise}__{width}__{sen_pause}'.replace(".","_") | |
| with open(f'{os.getcwd()}/saved/{file_name}.json','w') as file: | |
| file.write(json.dumps(set_json,indent=4)) | |
| file.close() | |
| return(f'{os.getcwd()}/saved/{file_name}.json') | |
| def load_set(set_file): | |
| with open(set_file,'r') as file: | |
| set_json=json.loads(file.read()) | |
| file.close() | |
| return(gr.update(value=set_json['model']),gr.update(value=set_json['length']), | |
| gr.update(value=set_json['noise']),gr.update(value=set_json['width']), | |
| gr.update(value=set_json['pause'])) | |
| # txt="""PiperTTS is a powerful text-to-speech TTS node designed to convert written text into high-quality spoken audio. This node leverages advanced voice synthesis models to generate natural-sounding speech, making it an invaluable tool for AI developers looking to add a vocal element to their projects.""" | |
| def button_on(stream): | |
| if stream==True: | |
| return gr.update(interactive=True,visible=True),gr.update(interactive=False,visible=False) | |
| if stream==False: | |
| return gr.update(interactive=False,visible=False),gr.update(interactive=True,visible=True) | |
| def clear_aud(): | |
| return None | |
| with gr.Blocks() as b: | |
| gr.HTML("<h1>Rhasspy Piper LU TTS Streaming</h1>") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| in_txt=gr.Textbox(label="Text",lines=10, value=my_examples) | |
| names=gr.Dropdown() | |
| with gr.Row(): | |
| stream_btn=gr.Button("Stream",interactive=True,visible=True) | |
| sub_btn=gr.Button(interactive=False,visible=False) | |
| cancel_btn=gr.Button("Stop") | |
| out_aud=gr.Audio(streaming=True, autoplay=True) | |
| with gr.Column(scale=1): | |
| with gr.Row(): | |
| with gr.Accordion("Informatiounen", open=False): | |
| gr.Markdown(""" | |
| D'Rhasspy **Piper TTS** Technologie gouf vum Michael Hansen, alias [Synesthesiam](https://github.com/synesthesiam), entwéckelt. | |
| Hien huet och d'Marylux Stëmm trainéiert. Déi zwou aner Lëtzebuergesch Stëmme goufe vum Marco Barnig realiséiert. | |
| D'Piper Technik berout op enger Ëmwandlung vun Text an **eSpeak-Phonemen**, déi zesumme mat den entspriechenden Audio-Dateien an | |
| engem **neuronale KI-Netz** trainéiert ginn. Déi generéiert TTS-Modeller sinn optiméiert fir Streaming a kënnen a Screenliesmaschinnen, | |
| wéi [NVDA](https://www.nvaccess.org/about-nvda/), agesat ginn. Sie kënnen awer och am Ganze synthetiséiert ginn, | |
| woubäi d'Qualitéit da besser gëtt. | |
| Verschidde Parameter wéi Liestempo, Sazpaus a Geräischintensitéiten kënnen mat Radioknäpp oder mat Schieberen (am Control-Tab) | |
| agestallt ginn. D'Parameter kënnen och an enger Datei ofgespäichert a reimportéiert ginn.""") | |
| with gr.Row(): | |
| vitess=gr.Radio(label="Liestempo", choices = speeds, value = "normal") | |
| pause=gr.Radio(label="Sazpaus", choices = stops, value = "mëttel") | |
| with gr.Accordion("Control", open=False): | |
| stream=gr.Checkbox(label="Stream",info="Streaming is fast, but lower quality",value=True,interactive=True) | |
| length=gr.Slider(label="Length", minimum=0.01, maximum=10.0, value=1) | |
| noise=gr.Slider(label="Noise", minimum=0.01, maximum=3.0, value=0.5, visible=True) | |
| width=gr.Slider(label="Noise Width", minimum=0.01, maximum=3.0, value=0.5, visible=True) | |
| sen_pause=gr.Slider(label="Sentence Pause", minimum=0.1, maximum=10.0, value=1, visible=True) | |
| with gr.Tab("Save Settings"): | |
| save_btn=gr.Button("Save") | |
| save_file=gr.File() | |
| with gr.Tab("Load Settings"): | |
| load_file=gr.File() | |
| with gr.Accordion("Model Config", open=False): | |
| json_ob=gr.JSON(label="JSON") | |
| f1=stream.change(button_on,stream,[stream_btn,sub_btn]) | |
| f2=save_btn.click(save_set,[names,length,noise,width,sen_pause],save_file) | |
| f3=load_file.change(load_set,load_file,[names,length,noise,width,sen_pause]) | |
| f4=names.change(load_mod,names,json_ob).then(clear_aud,None,out_aud) | |
| f5=stream_btn.click(clear_aud,None,out_aud) | |
| f6=stream_btn.click(pp.stream_tts,[in_txt,names,length,noise,width,sen_pause],out_aud) | |
| f7=sub_btn.click(clear_aud,None,out_aud) | |
| f8=sub_btn.click(pp.tts,[in_txt,names,length,noise,width,sen_pause],out_aud) | |
| cancel_btn.click(None,None,None,cancels=[f1,f2,f3,f4,f5,f6,f7,f8]) | |
| vitess.change(change_speed, inputs=vitess, outputs=length) | |
| pause.change(change_stop, inputs=pause, outputs=sen_pause) | |
| # cancel_btn.click(None,None,None,cancels=[f1,f3,f5,f6,f7,f8]) | |
| b.load(init,None,names) | |
| # b.queue(default_concurrency_limit=20).launch(max_threads=40) | |
| b.launch() |