import gradio as gr import requests import json import os from pathlib import Path import uuid # gr.NO_RELOAD = False # API Base URL BASE_URL = os.environ.get("BASE_URL", "") # Counter persistence file COUNTER_FILE = Path("generation_counter.json") # Example texts EXAMPLE_TEXT_ENGLISH = "Welcome to Ringg TTS! This is a text to speech system that can convert your text into natural-sounding audio. Try it out with your own content!" EXAMPLE_TEXT_HINDI = "नमस्ते! मैं रिंग टीटीएस हूँ। मैं आपके टेक्स्ट को प्राकृतिक आवाज़ में बदल सकता हूँ। कृपया अपना टेक्स्ट यहाँ लिखें और सुनें।" EXAMPLE_TEXT_MIXED = "Hello दोस्तों! Welcome to Ringg TTS. यह एक बहुत ही शानदार text to speech system है जो Hindi और English दोनों languages को support करता है।" def load_counter(): """Load generation counter from file""" try: if COUNTER_FILE.exists(): with open(COUNTER_FILE, "r") as f: data = json.load(f) return data.get("count", 0) except Exception as e: print(f"Error loading counter: {e}") return 0 def save_counter(count): """Save generation counter to file""" try: with open(COUNTER_FILE, "w") as f: json.dump({"count": count}, f) except Exception as e: print(f"Error saving counter: {e}") def get_voices(): """Fetch available voices from API""" try: response = requests.get(f"{BASE_URL}/voices", timeout=10) if response.status_code == 200: voices_data = response.json().get("voices", {}) # Create a list of tuples (display_name, voice_id) voices = [] for voice_id, voice_info in voices_data.items(): name = voice_info.get("name", "Unknown") gender = voice_info.get("gender", "N/A") display_name = f"{name} ({gender})" voices.append((display_name, voice_id)) return sorted(voices, key=lambda x: x[0]) return [] except Exception as e: print(f"Error fetching voices: {e}") return [] def synthesize_speech(text, voice_id): """Synthesize speech from text""" if not text or not text.strip(): return None, "⚠️ Please enter some text", "", "", "", "", "", "" if not voice_id: return None, "⚠️ Please select a voice", "", "", "", "", "", "" # Print input text length text_length = len(text) print(f"Input text length: {text_length} characters") try: payload = {"text": text, "voice_id": voice_id} response = requests.post( f"{BASE_URL}/synthesize", headers={"Content-Type": "application/json"}, json=payload, timeout=30, ) if response.status_code == 200: result = response.json() if result.get("success"): audio_url = result.get("audio_url", "") metrics = result.get("metrics", {}) # Format metrics total_time = f"{metrics.get('t', 0):.3f}s" rtf = f"{metrics.get('rtf', 0):.4f}" wav_duration = f"{metrics.get('wav_seconds', 0):.2f}s" vocoder_time = f"{metrics.get('t_vocoder', 0):.3f}s" no_vocoder_time = f"{metrics.get('t_no_vocoder', 0):.3f}s" rtf_no_vocoder = f"{metrics.get('rtf_no_vocoder', 0):.4f}" status_msg = "✅ Audio generated successfully!" return ( audio_url, status_msg, total_time, rtf, wav_duration, vocoder_time, no_vocoder_time, rtf_no_vocoder, ) else: error_msg = result.get("message", "Unknown error") return None, f"❌ Synthesis failed: {error_msg}", "", "", "", "", "", "" else: return ( None, f"❌ API returned status code: {response.status_code}", "", "", "", "", "", "", ) except Exception as e: return None, f"❌ Error: {str(e)}", "", "", "", "", "", "" # Load initial counter value initial_counter = load_counter() # Create Gradio interface with gr.Blocks( theme=gr.themes.Base( font=[gr.themes.GoogleFont("Source Sans Pro"), "Arial", "sans-serif"] ), css=".gradio-container {max-width: none !important;}", ) as demo: # Title with Health Status with gr.Row(): with gr.Column(scale=4): audio_image = gr.HTML( value="""
Logo

Ringg Squirrel TTS v1.0 🐿️

""" ) with gr.Column(scale=1): generation_counter = gr.Markdown( f"**Generations:** {initial_counter}", elem_id="counter" ) # Best Practices Section gr.Markdown(""" ### 📝 Best Practices for Best Results - **Supported Languages:** Hindi and English only - **Check spelling carefully:** Misspelled words may be mispronounced - **Punctuation matters:** Use proper punctuation for natural pauses and intonation - **Technical terms:** Extremely rare or specialized technical terms might be mispronounced - **Numbers & dates:** Write numbers as words for better pronunciation (e.g., "twenty-five" instead of "25") """) # Text Input text_input = gr.Textbox( label="Text (max 500 characters)", placeholder="Type or paste your text here (max 500 characters)...", lines=6, max_lines=10, max_length=500, ) # Character count display char_count = gr.Markdown("**Character count:** 0 / 500") with gr.Row(): with gr.Column(scale=1): # Voice Selection voices = get_voices() voice_choices = {display: vid for display, vid in voices} voice_dropdown = gr.Dropdown( choices=list(voice_choices.keys()), label="Choose a voice style", info=f"{len(voices)} voices available", value=list(voice_choices.keys())[0] if voices else None, ) with gr.Column(scale=1): audio_output = gr.Audio(label="Listen to your audio", type="filepath") metrics_header = gr.Markdown("### 📊 Generation Metrics", visible=False) metrics_output = gr.Code( label="Metrics", language="json", interactive=False, visible=False ) generate_btn = gr.Button("🎬 Generate Speech", variant="primary", size="lg") gr.Markdown("#### 🎯 Try these examples:") with gr.Row(): example_btn1 = gr.Button("English Example", size="sm") example_btn2 = gr.Button("Hindi Example", size="sm") example_btn3 = gr.Button("Mixed Example", size="sm") # Footer gr.Markdown("---") gr.Markdown("# 🙏 Acknowledgements") # gr.Markdown("- Based on [ZipVoice](https://github.com/k2-fsa/ZipVoice)") gr.Markdown( "- Special thanks to [@jeremylee12](https://huggingface.co/jeremylee12) for his contributions" ) # State variable for generation counter gen_count_state = gr.State(value=initial_counter) # Event Handlers def update_char_count(text): """Update character count as user types""" count = len(text) if text else 0 return f"**Character count:** {count} / 500" def load_example_text(example_text): """Load example text and update character count""" count = len(example_text) return example_text, f"**Character count:** {count} / 500" def clear_text(): """Clear text input""" return "", "**Character count:** 0 / 500" def on_generate(text, voice_display, gen_count): voice_id = voice_choices.get(voice_display) audio_url, _status, t_time, rtf, wav_dur, voc_time, no_voc_time, rtf_no_voc = ( synthesize_speech(text, voice_id) ) # Download audio if URL is available audio_file = None if audio_url: try: audio_response = requests.get(audio_url, timeout=30) if audio_response.status_code == 200: # Save to temporary file audio_file = f"/tmp/ringg_{str(uuid.uuid4())}.wav" with open(audio_file, "wb") as f: f.write(audio_response.content) # Increment counter only on successful generation gen_count += 1 # Save counter to file for persistence save_counter(gen_count) except Exception as e: _status = f"⚠️ Audio generated but download failed: {str(e)}" # Format metrics as JSON string (only if available) has_metrics = any([t_time, rtf, wav_dur, voc_time, no_voc_time, rtf_no_voc]) metrics_json = "" if has_metrics: metrics_json = json.dumps( { "total_time": t_time, "rtf": rtf, "audio_duration": wav_dur, "vocoder_time": voc_time, "no_vocoder_time": no_voc_time, "rtf_no_vocoder": rtf_no_voc, }, indent=2, ) return ( audio_file, gr.update(visible=has_metrics), gr.update(value=metrics_json, visible=has_metrics), gen_count, f"**Generations:** {gen_count}", ) # Update character count on text input change text_input.change(fn=update_char_count, inputs=[text_input], outputs=[char_count]) # Example button clicks example_btn1.click( fn=lambda: load_example_text(EXAMPLE_TEXT_ENGLISH), inputs=None, outputs=[text_input, char_count], ) example_btn2.click( fn=lambda: load_example_text(EXAMPLE_TEXT_HINDI), inputs=None, outputs=[text_input, char_count], ) example_btn3.click( fn=lambda: load_example_text(EXAMPLE_TEXT_MIXED), inputs=None, outputs=[text_input, char_count], ) generate_btn.click( fn=on_generate, inputs=[text_input, voice_dropdown, gen_count_state], outputs=[ audio_output, # status_output, metrics_header, metrics_output, gen_count_state, generation_counter, ], ) # Refresh health status on load demo.load() if __name__ == "__main__": demo.queue(max_size=5) demo.launch(share=False, server_name="0.0.0.0", server_port=7860, debug=True)