Spaces:
Running
Running
| import gradio as gr | |
| import requests | |
| import json | |
| import os | |
| from pathlib import Path | |
| import uuid | |
| # gr.NO_RELOAD = False | |
| # API Base URL | |
| BASE_URL = os.environ.get("BASE_URL", "") | |
| # Counter persistence file | |
| COUNTER_FILE = Path("generation_counter.json") | |
| # Example texts | |
| EXAMPLE_TEXT_ENGLISH = "Welcome to Ringg TTS! This is a text to speech system that can convert your text into natural-sounding audio. Try it out with your own content!" | |
| EXAMPLE_TEXT_HINDI = "नमस्ते! मैं रिंग टीटीएस हूँ। मैं आपके टेक्स्ट को प्राकृतिक आवाज़ में बदल सकता हूँ। कृपया अपना टेक्स्ट यहाँ लिखें और सुनें।" | |
| EXAMPLE_TEXT_MIXED = "Hello दोस्तों! Welcome to Ringg TTS. यह एक बहुत ही शानदार text to speech system है जो Hindi और English दोनों languages को support करता है।" | |
| def load_counter(): | |
| """Load generation counter from file""" | |
| try: | |
| if COUNTER_FILE.exists(): | |
| with open(COUNTER_FILE, "r") as f: | |
| data = json.load(f) | |
| return data.get("count", 0) | |
| except Exception as e: | |
| print(f"Error loading counter: {e}") | |
| return 0 | |
| def save_counter(count): | |
| """Save generation counter to file""" | |
| try: | |
| with open(COUNTER_FILE, "w") as f: | |
| json.dump({"count": count}, f) | |
| except Exception as e: | |
| print(f"Error saving counter: {e}") | |
| def get_voices(): | |
| """Fetch available voices from API""" | |
| try: | |
| response = requests.get(f"{BASE_URL}/voices", timeout=10) | |
| if response.status_code == 200: | |
| voices_data = response.json().get("voices", {}) | |
| # Create a list of tuples (display_name, voice_id) | |
| voices = [] | |
| for voice_id, voice_info in voices_data.items(): | |
| name = voice_info.get("name", "Unknown") | |
| gender = voice_info.get("gender", "N/A") | |
| display_name = f"{name} ({gender})" | |
| voices.append((display_name, voice_id)) | |
| return sorted(voices, key=lambda x: x[0]) | |
| return [] | |
| except Exception as e: | |
| print(f"Error fetching voices: {e}") | |
| return [] | |
| def synthesize_speech(text, voice_id): | |
| """Synthesize speech from text""" | |
| if not text or not text.strip(): | |
| return None, "⚠️ Please enter some text", "", "", "", "", "", "" | |
| if not voice_id: | |
| return None, "⚠️ Please select a voice", "", "", "", "", "", "" | |
| # Print input text length | |
| text_length = len(text) | |
| print(f"Input text length: {text_length} characters") | |
| try: | |
| payload = {"text": text, "voice_id": voice_id} | |
| response = requests.post( | |
| f"{BASE_URL}/synthesize", | |
| headers={"Content-Type": "application/json"}, | |
| json=payload, | |
| timeout=30, | |
| ) | |
| if response.status_code == 200: | |
| result = response.json() | |
| if result.get("success"): | |
| audio_url = result.get("audio_url", "") | |
| metrics = result.get("metrics", {}) | |
| # Format metrics | |
| total_time = f"{metrics.get('t', 0):.3f}s" | |
| rtf = f"{metrics.get('rtf', 0):.4f}" | |
| wav_duration = f"{metrics.get('wav_seconds', 0):.2f}s" | |
| vocoder_time = f"{metrics.get('t_vocoder', 0):.3f}s" | |
| no_vocoder_time = f"{metrics.get('t_no_vocoder', 0):.3f}s" | |
| rtf_no_vocoder = f"{metrics.get('rtf_no_vocoder', 0):.4f}" | |
| status_msg = "✅ Audio generated successfully!" | |
| return ( | |
| audio_url, | |
| status_msg, | |
| total_time, | |
| rtf, | |
| wav_duration, | |
| vocoder_time, | |
| no_vocoder_time, | |
| rtf_no_vocoder, | |
| ) | |
| else: | |
| error_msg = result.get("message", "Unknown error") | |
| return None, f"❌ Synthesis failed: {error_msg}", "", "", "", "", "", "" | |
| else: | |
| return ( | |
| None, | |
| f"❌ API returned status code: {response.status_code}", | |
| "", | |
| "", | |
| "", | |
| "", | |
| "", | |
| "", | |
| ) | |
| except Exception as e: | |
| return None, f"❌ Error: {str(e)}", "", "", "", "", "", "" | |
| # Load initial counter value | |
| initial_counter = load_counter() | |
| # Create Gradio interface | |
| with gr.Blocks( | |
| theme=gr.themes.Base( | |
| font=[gr.themes.GoogleFont("Source Sans Pro"), "Arial", "sans-serif"] | |
| ), | |
| css=".gradio-container {max-width: none !important;}", | |
| ) as demo: | |
| # Title with Health Status | |
| with gr.Row(): | |
| with gr.Column(scale=4): | |
| audio_image = gr.HTML( | |
| value=""" | |
| <div style="display: flex; align-items: center; gap: 10px;"> | |
| <img style="width: 50px; height: 50px; background-color: white; border-radius: 10%;" src="https://storage.googleapis.com/desivocal-prod/desi-vocal/ringg.svg" alt="Logo"> | |
| <h1 style="margin: 0;">Ringg Squirrel TTS v1.0 🐿️</h1> | |
| </div> | |
| """ | |
| ) | |
| with gr.Column(scale=1): | |
| generation_counter = gr.Markdown( | |
| f"**Generations:** {initial_counter}", elem_id="counter" | |
| ) | |
| # Best Practices Section | |
| gr.Markdown(""" | |
| ### 📝 Best Practices for Best Results | |
| - **Supported Languages:** Hindi and English only | |
| - **Check spelling carefully:** Misspelled words may be mispronounced | |
| - **Punctuation matters:** Use proper punctuation for natural pauses and intonation | |
| - **Technical terms:** Extremely rare or specialized technical terms might be mispronounced | |
| - **Numbers & dates:** Write numbers as words for better pronunciation (e.g., "twenty-five" instead of "25") | |
| """) | |
| # Text Input | |
| text_input = gr.Textbox( | |
| label="Text (max 500 characters)", | |
| placeholder="Type or paste your text here (max 500 characters)...", | |
| lines=6, | |
| max_lines=10, | |
| max_length=500, | |
| ) | |
| # Character count display | |
| char_count = gr.Markdown("**Character count:** 0 / 500") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| # Voice Selection | |
| voices = get_voices() | |
| voice_choices = {display: vid for display, vid in voices} | |
| voice_dropdown = gr.Dropdown( | |
| choices=list(voice_choices.keys()), | |
| label="Choose a voice style", | |
| info=f"{len(voices)} voices available", | |
| value=list(voice_choices.keys())[0] if voices else None, | |
| ) | |
| with gr.Column(scale=1): | |
| audio_output = gr.Audio(label="Listen to your audio", type="filepath") | |
| metrics_header = gr.Markdown("### 📊 Generation Metrics", visible=False) | |
| metrics_output = gr.Code( | |
| label="Metrics", language="json", interactive=False, visible=False | |
| ) | |
| generate_btn = gr.Button("🎬 Generate Speech", variant="primary", size="lg") | |
| gr.Markdown("#### 🎯 Try these examples:") | |
| with gr.Row(): | |
| example_btn1 = gr.Button("English Example", size="sm") | |
| example_btn2 = gr.Button("Hindi Example", size="sm") | |
| example_btn3 = gr.Button("Mixed Example", size="sm") | |
| # Footer | |
| gr.Markdown("---") | |
| gr.Markdown("# 🙏 Acknowledgements") | |
| # gr.Markdown("- Based on [ZipVoice](https://github.com/k2-fsa/ZipVoice)") | |
| gr.Markdown( | |
| "- Special thanks to [@jeremylee12](https://huggingface.co/jeremylee12) for his contributions" | |
| ) | |
| # State variable for generation counter | |
| gen_count_state = gr.State(value=initial_counter) | |
| # Event Handlers | |
| def update_char_count(text): | |
| """Update character count as user types""" | |
| count = len(text) if text else 0 | |
| return f"**Character count:** {count} / 500" | |
| def load_example_text(example_text): | |
| """Load example text and update character count""" | |
| count = len(example_text) | |
| return example_text, f"**Character count:** {count} / 500" | |
| def clear_text(): | |
| """Clear text input""" | |
| return "", "**Character count:** 0 / 500" | |
| def on_generate(text, voice_display, gen_count): | |
| voice_id = voice_choices.get(voice_display) | |
| audio_url, _status, t_time, rtf, wav_dur, voc_time, no_voc_time, rtf_no_voc = ( | |
| synthesize_speech(text, voice_id) | |
| ) | |
| # Download audio if URL is available | |
| audio_file = None | |
| if audio_url: | |
| try: | |
| audio_response = requests.get(audio_url, timeout=30) | |
| if audio_response.status_code == 200: | |
| # Save to temporary file | |
| audio_file = f"/tmp/ringg_{str(uuid.uuid4())}.wav" | |
| with open(audio_file, "wb") as f: | |
| f.write(audio_response.content) | |
| # Increment counter only on successful generation | |
| gen_count += 1 | |
| # Save counter to file for persistence | |
| save_counter(gen_count) | |
| except Exception as e: | |
| _status = f"⚠️ Audio generated but download failed: {str(e)}" | |
| # Format metrics as JSON string (only if available) | |
| has_metrics = any([t_time, rtf, wav_dur, voc_time, no_voc_time, rtf_no_voc]) | |
| metrics_json = "" | |
| if has_metrics: | |
| metrics_json = json.dumps( | |
| { | |
| "total_time": t_time, | |
| "rtf": rtf, | |
| "audio_duration": wav_dur, | |
| "vocoder_time": voc_time, | |
| "no_vocoder_time": no_voc_time, | |
| "rtf_no_vocoder": rtf_no_voc, | |
| }, | |
| indent=2, | |
| ) | |
| return ( | |
| audio_file, | |
| gr.update(visible=has_metrics), | |
| gr.update(value=metrics_json, visible=has_metrics), | |
| gen_count, | |
| f"**Generations:** {gen_count}", | |
| ) | |
| # Update character count on text input change | |
| text_input.change(fn=update_char_count, inputs=[text_input], outputs=[char_count]) | |
| # Example button clicks | |
| example_btn1.click( | |
| fn=lambda: load_example_text(EXAMPLE_TEXT_ENGLISH), | |
| inputs=None, | |
| outputs=[text_input, char_count], | |
| ) | |
| example_btn2.click( | |
| fn=lambda: load_example_text(EXAMPLE_TEXT_HINDI), | |
| inputs=None, | |
| outputs=[text_input, char_count], | |
| ) | |
| example_btn3.click( | |
| fn=lambda: load_example_text(EXAMPLE_TEXT_MIXED), | |
| inputs=None, | |
| outputs=[text_input, char_count], | |
| ) | |
| generate_btn.click( | |
| fn=on_generate, | |
| inputs=[text_input, voice_dropdown, gen_count_state], | |
| outputs=[ | |
| audio_output, | |
| # status_output, | |
| metrics_header, | |
| metrics_output, | |
| gen_count_state, | |
| generation_counter, | |
| ], | |
| ) | |
| # Refresh health status on load | |
| demo.load() | |
| if __name__ == "__main__": | |
| demo.queue(max_size=5) | |
| demo.launch(share=False, server_name="0.0.0.0", server_port=7860, debug=True) | |